def prepare(): if not os.path.exists(CONST.LOG_PATH): os.mkdir(CONST.LOG_PATH) if not os.path.exists(CONST.LOG_SYSBENCH_PATH): os.mkdir(CONST.LOG_SYSBENCH_PATH) global opt, task_detail, instance_detail, model_detail parser = argparse.ArgumentParser() parser.add_argument('--batch_size', type=int, default=2, help='Training Batch Size') parser.add_argument('--memory', type=str, default='', help='add replay memory') parser.add_argument('--task_id', type=int, required=True, help='get task info') parser.add_argument('--inst_id', type=int, required=True, help='get inst info') parser.add_argument('--model_id', type=int, required=True, help='get model info') parser.add_argument('--host', type=str, required=True, help='cluster host for set mysql param') opt = parser.parse_args() task_id = opt.task_id inst_id = opt.inst_id model_id = opt.model_id init_logger(task_id, False, True) CONST.TASK_ID = task_id logger.info("start cdbtune") logger.info(opt) # taskdb = database("127.0.0.1",3306,"root","123456","cdbtune") taskdb = database("10.249.50.200", 4839, "cdbtune", "123456", "cdbtune") rsp_task = taskdb.fetch_all("select * from tb_task where task_id = %d" % task_id) rsp_inst = taskdb.fetch_all( "select * from tb_mysql_inst where inst_id = %d" % inst_id) rsp_model = taskdb.fetch_all( "select * from tb_models where model_id = %d" % model_id) if len(rsp_task) == 0 or len(rsp_inst) == 0 or len(rsp_model) == 0: os_quit(Err.INPUT_ERROR, "task_id or inst_id or model_id doesn`t exit") task_detail = rsp_task[0] instance_detail = rsp_inst[0] model_detail = rsp_model[0] method = model_detail["method"] model_path = model_detail["position"] num_knobs = model_detail["knobs"] num_metrics = model_detail["dimension"] env = environment.TencentServer(instance=instance_detail, task_detail=task_detail, model_detail=model_detail, host=opt.host) # Build models if method == 'ddpg': ddpg_opt = dict() ddpg_opt['tau'] = 0.001 ddpg_opt['alr'] = 0.00001 ddpg_opt['clr'] = 0.00001 ddpg_opt['model'] = model_path gamma = 0.99 memory_size = 100000 ddpg_opt['gamma'] = gamma ddpg_opt['batch_size'] = opt.batch_size ddpg_opt['memory_size'] = memory_size model = models.DDPG(n_states=num_metrics, n_actions=num_knobs, opt=ddpg_opt, ouprocess=True) else: model = models.DQN() pass if len(opt.memory) > 0: model.replay_memory.load_memory(opt.memory) logger.info("Load Memory: {}".format(len(model.replay_memory))) # Load mean value and varianc current_knob = environment.get_init_knobs() return env, model
if not os.path.exists('test_knob'): os.mkdir('test_knob') expr_name = 'eval_{}_{}'.format(opt.method, str(utils.get_timestamp())) logger = utils.Logger( name=opt.method, log_file='log/{}.log'.format(expr_name) ) if opt.other_knob != 0: logger.warn('USE Other Knobs') # Load mean value and varianc current_knob = environment.get_init_knobs() def compute_percentage(default, current): """ compute metrics percentage versus default settings Args: default: dict, metrics from default settings current: dict, metrics from current settings """ delta_tps = 100*(current[0] - default[0]) / default[0] delta_latency = 100*(-current[1] + default[1]) / default[1] return delta_tps, delta_latency def generate_knob(action, method): if method == 'ddpg':
batch_actions = [x[1].tolist() for x in batch_data] test_loss += model.train_actor((batch_states, batch_actions), is_train=False) print("[Epoch {}] Test Loss: {}".format(epoch, test_loss)) model.save_actor('sl_model_params/sl_train_actor_{}.pth'.format(epoch)) else: # Create Environment if opt.tencent: env = environment.TencentServer(wk_type=opt.workload, instance_name=opt.instance, request_url=tuner_configs.TENCENT_URL) else: env = environment.DockerServer(wk_type=opt.workload, instance_name=opt.instance) current_knob = environment.get_init_knobs() expr_name = 'sl_test_ddpg_{}'.format(str(utils.get_timestamp())) logger = utils.Logger( name='train_supervised', log_file='log/{}.log'.format(expr_name) ) assert len(opt.params) != 0, "Please add params' path" def generate_knob(action): return environment.gen_continuous(action) model.load_actor(opt.params)