예제 #1
0
def start_workers(num_agents: int = 1,
                  env_name: str = '',
                  state_builder: StateBuilder = None,
                  num_epochs: int = 2001,
                  update_frequency: int = 10):

    State.num_agents = num_agents

    for agent in range(num_agents):
        controller = Controller(learner=None,
                                env_id=env_name,
                                state_builder=state_builder,
                                update_freq=update_frequency,
                                id=agent)

        learner = QLearner(controller.get_action_space(),
                           epsilon=0.1,
                           init_alpha=.5,
                           gamma=.9,
                           decay_rate=.999)

        controller.set_learner(learner)

        agent_thread = threading.Thread(target=controller.train,
                                        kwargs={
                                            "number_epochs":
                                            num_epochs,
                                            "save_location":
                                            '../models/{}-{}.model'.format(
                                                env_name, agent)
                                        })
        agent_thread.start()

    return
예제 #2
0
def main():
    # Taxi-v2
    cart_pole_ctrl = Controller(None,
                                'CartPole-v1',
                                StateBuilderCartPole(),
                                communicate=False)
    # cart_pole_ctrl = Controller(None, 'Taxi-v2', None, communicate=False)
    # cart_pole_ctrl = Controller(None, 'LunarLander-v2', state_builder=StateBuilderLunarLander(), communicate=False)
    # cart_pole_ctrl = Controller(None, 'FrozenLake-v0', None, communicate=False)

    running_cumulative_reward = []
    for _ in range(3):
        learner = QLearner(cart_pole_ctrl.get_action_space(),
                           epsilon=0.1,
                           init_alpha=.5,
                           gamma=.9,
                           decay_rate=.999)
        cart_pole_ctrl.set_learner(learner)

        cumulative_reward, num_steps = cart_pole_ctrl.train(number_epochs=2001)
        running_cumulative_reward.append(cumulative_reward)

    ar = np.array(running_cumulative_reward)
    means = np.mean(ar, axis=0)

    standard_errors = scipy.stats.sem(ar, axis=0)
    uperconf = means + standard_errors
    lowerconf = means - standard_errors
    # avg_cumulative = ar.sum(axis=0)
    # avg_cumulative = avg_cumulative/len(running_cumulative_reward)

    x = np.arange(0, len(means))
    # plt.plot(x, means, 'o')

    z = np.polyfit(x, means, 5)
    p = np.poly1d(z)
    plt.plot(x, p(x))

    plt.fill_between(x, uperconf, lowerconf, alpha=0.3, antialiased=True)

    # plt.ylim(ymax=50, ymin=-800)

    plt.show()
    plt.close()

    # z = np.arange(0, len(num_steps))
    # plt.plot(z, num_steps)
    # plt.show()
    # plt.close()

    cart_pole_ctrl.env.close()
예제 #3
0
def use_model():
    cart_pole_ctrl = Controller(None,
                                'CartPole-v1',
                                StateBuilderCartPole(),
                                communicate=False)
    # cart_pole_ctrl = Controller(None, 'Taxi-v2', None)
    # cart_pole_ctrl = Controller(None, 'LunarLander-v2', state_builder=StateBuilderLunarLander(), communicate=False)

    learner = QLearner(cart_pole_ctrl.get_action_space(),
                       epsilon=0.0,
                       init_alpha=.5,
                       gamma=.9)

    cart_pole_ctrl.set_learner(learner)
    cart_pole_ctrl.load("models/CartPole-v1-7.model")

    count = 0
    while True:
        cart_pole_ctrl.run(render=True)
        count += 1
        print("Epoch {}".format(count))
예제 #4
0
def start_reference_aggregated_learner(env_name: str = ''):
    state_builder = StateBuilderCache.builders.get(env_name, None)

    controller = Controller(learner=None,
                            env_id=env_name,
                            state_builder=state_builder)

    learner = QLearner(controller.get_action_space(),
                       epsilon=0.1,
                       init_alpha=.5,
                       gamma=.9,
                       decay_rate=.999)

    # SET MODEL with copy of Server Model
    learner.set_model(copy.deepcopy(QServer.Q))

    controller.set_learner(learner)

    agent_thread = threading.Thread(target=controller.run)
    agent_thread.start()
    print('Started Reference Learner')

    return