training_steps = 5000 eval_steps = 2000 load_period = 250 epsilon = 0.5 MIN_VMS = 1 MAX_VMS = 20 split_crit = ANY_POINT cons_trans = True CONF_FILE = TIRAMOLA_DIR + "read_load_5k/any.json" ############################################################## PRINT = len(sys.argv) > 1 and sys.argv[1] == "-p" dm = DecisionMaking.DecisionMaker(TIRAMOLA_DIR + "decisionMaking.json", 'training.data') scenario = ReadLoadScenario(training_steps, load_period, 10, MIN_VMS, MAX_VMS) dm.set_state(scenario.get_current_measurements()) model = dm.get_model() model.set_splitting(split_crit, cons_trans) total_reward = 0 if TRAIN: for time in range(training_steps): if random.uniform(0, 1) < epsilon: action = random.choice(model.get_legal_actions()) else: action = model.suggest_action() reward = scenario.execute_action(action) meas = scenario.get_current_measurements()
"start", "start_chain", "half", "half_chain", "end", "always" ]: print "Unknown split strategy!" exit() if "start" in split_strategy: split_step = 0 elif "half" in split_strategy: split_step = training_steps // 2 else: split_step = training_steps PRINT = len(sys.argv) > 1 and sys.argv[1] == "-p" if PRINT: num_tests = 1 scenario = ReadLoadScenario(training_steps) conf = ModelConf(CONF_FILE) assert conf.get_model_type() == MDP_DT, "Wrong model type in MDP-DT example" total_reward_results = [] total_splits_results = [] good_splits_results = [] for i in range(num_tests): model = MDPDTModel(conf.get_model_conf()) model.set_state(scenario.get_current_measurements()) model.set_allow_splitting(False) model.set_splitting(split_crit, cons_trans) total_reward = 0
CONF_FILE = TIRAMOLA_DIR + "read_load_5k/qdt.json" ############################################################## PRINT = len(sys.argv) > 1 and sys.argv[1] == "-p" if PRINT: num_tests = 1 conf = ModelConf(CONF_FILE) assert conf.get_model_type() == Q_DT, "Wrong model type in QDT example" total_reward_results = [] total_splits_results = [] good_splits_results = [] for i in range(num_tests): scenario = ReadLoadScenario(training_steps, load_period, 10, MIN_VMS, MAX_VMS) model = QDTModel(conf.get_model_conf()) model.set_state(scenario.get_current_measurements()) model.set_allow_splitting(False) total_reward = 0 for time in range(training_steps + eval_steps): if random.uniform(0, 1) < epsilon and time < training_steps: action = random.choice(model.get_legal_actions()) else: action = model.suggest_action() reward = scenario.execute_action(action) meas = scenario.get_current_measurements() model.update(action, meas, reward)