Beispiel #1
0
else:
    split_step = training_steps

PRINT = len(sys.argv) > 1 and sys.argv[1] == "-p"
if PRINT: num_tests = 1

conf = ModelConf(CONF_FILE)
assert conf.get_model_type() == MDP_DT, "Wrong model type in MDP-DT example"

total_reward_results = []
total_splits_results = []
good_splits_results  = []

for i in range(num_tests):

    scenario = ComplexScenario(training_steps, load_period, 10, MIN_VMS, MAX_VMS)
    model = MDPDTModel(conf.get_model_conf())
    model.set_state(scenario.get_current_measurements())
    model.set_allow_splitting(False)
    model.set_splitting(split_crit, cons_trans)

    total_reward = 0
    for time in range(training_steps + eval_steps):
    
        if random.uniform(0, 1) < epsilon and time < training_steps:
            action = random.choice(model.get_legal_actions())
        else:
            action = model.suggest_action()

        reward = scenario.execute_action(action)
        meas   = scenario.get_current_measurements()