else: split_step = training_steps PRINT = len(sys.argv) > 1 and sys.argv[1] == "-p" if PRINT: num_tests = 1 conf = ModelConf(CONF_FILE) assert conf.get_model_type() == MDP_DT, "Wrong model type in MDP-DT example" total_reward_results = [] total_splits_results = [] good_splits_results = [] for i in range(num_tests): scenario = ComplexScenario(training_steps, load_period, 10, MIN_VMS, MAX_VMS) model = MDPDTModel(conf.get_model_conf()) model.set_state(scenario.get_current_measurements()) model.set_allow_splitting(False) model.set_splitting(split_crit, cons_trans) total_reward = 0 for time in range(training_steps + eval_steps): if random.uniform(0, 1) < epsilon and time < training_steps: action = random.choice(model.get_legal_actions()) else: action = model.suggest_action() reward = scenario.execute_action(action) meas = scenario.get_current_measurements()