Esempio n. 1
0
def main():
    # --------------preparation--------------------
    rst_path, sim_path = generatePath(
        current_time)  # Create a new folder for the experiment
    RL = QLearningTable(list(range(
        len(green_states))))  # Initialize the Q-learning framework
    feed_path = '{}/results/{}/qtable.csv'.format(WORKSPACE, 'p5i3g0')
    RL.feedQTable(
        feed_path
    )  # This could be helpful when inheriting from previous trained agent
    # ---------------training--------------------
    trainAgent(RL, rst_path, sim_path)
    # --------------testing--------------------
    RL.epsilon = 1  # Epsilon-greedy no longer selects random actions
    fixed, rl, actuated = testAgent('fixed', RL), testAgent('rl',
                                                            RL), testAgent(
                                                                'actuated', RL)
    plotTestResult(rl, fixed, actuated, sim_path)
    flow_scenarios = ['-50%', '-25%', '0%', '+25%', '+50%']
    pushAgent(flow_scenarios, sim_path,
              RL)  # Explore the limit of the trained agent
    # --------------results----------------------
    RL.saveQTable('{}/qtable.csv'.format(sim_path))
    RL.plotCumulativeReward(sim_path)  # Plot the cumulative reward
    RL_params = {
        'lr': RL.alpha,
        'gamma': RL.gamma,
        'e_max': RL.e_greedy_max,
        'e_inc': RL.e_greedy_increment
    }
    writeLog(RL_params, rst_path, sim_path,
             clean=True)  # Record some basic information of the experiment
    # --------------end--------------------
    print('\nALL DONE, check {}'.format(str(current_time)))
Esempio n. 2
0
def main():
    trained_number = getLastExperiment('p5i3g0')
    RL = QLearningTable(list(range(len(green_states))))
    trained_path = '{}/results/{}/'.format(WORKSPACE, trained_number)
    qtable_path = trained_path + 'qtable.csv'
    RL.feedQTable(qtable_path)
    RL.epsilon = 1
    fixed,rl,actuated = testAgent('fixed', RL), testAgent('rl', RL), testAgent('actuated', RL)
    plotTestResult(rl, fixed, actuated, trained_path)
Esempio n. 3
0
            if env.isCheckpoint():
                o, s = env.getCurrentOccasion()
                a = RL.chooseAction(s, o)
                env.prolongTL(a)

        if step % VERIFY_INTERVAL == 0:
            env.calWaitingTime()
        step += 1
        env.conn.simulationStep()

    env.conn.close()
    sys.stdout.flush()

    print(round(np.mean(env.waitingtime), 2), len(env.waitingtime))
    return env.waitingtime


if __name__ == '__main__':
    from RL_brain import QLearningTable
    from global_var import green_states, WORKSPACE

    trained_path = '{}/results/{}/'.format(WORKSPACE, 'p5i3g0')
    qtable_path = trained_path + 'qtable.csv'
    RL = QLearningTable(list(range(len(green_states))))
    RL.feedQTable(qtable_path)
    RL.epsilon = 1
    fixed, rl, actuated = testAgent('fixed', RL), testAgent('rl',
                                                            RL), testAgent(
                                                                'actuated', RL)
    plotTestResult(rl, fixed, actuated, trained_path)