def configuration_recommendation_ddpg(result_info): # pylint: disable=invalid-name LOG.info('Use ddpg to recommend configuration') result_id = result_info['newest_result_id'] result = Result.objects.filter(pk=result_id) session = Result.objects.get(pk=result_id).session agg_data = DataUtil.aggregate_data(result) metric_data = agg_data['y_matrix'].flatten() metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1)) normalized_metric_data = metric_scalar.transform(metric_data.reshape( 1, -1))[0] cleaned_knob_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], session) knob_labels = np.array(cleaned_knob_data[1]).flatten() knob_num = len(knob_labels) metric_num = len(metric_data) ddpg = DDPG(n_actions=knob_num, n_states=metric_num) if session.ddpg_actor_model is not None and session.ddpg_critic_model is not None: ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model) if session.ddpg_reply_memory is not None: ddpg.replay_memory.set(session.ddpg_reply_memory) knob_data = ddpg.choose_action(normalized_metric_data) knob_bounds = np.vstack(DataUtil.get_knob_bounds(knob_labels, session)) knob_data = MinMaxScaler().fit(knob_bounds).inverse_transform( knob_data.reshape(1, -1))[0] conf_map = {k: knob_data[i] for i, k in enumerate(knob_labels)} conf_map_res = {} conf_map_res['status'] = 'good' conf_map_res['result_id'] = result_id conf_map_res['recommendation'] = conf_map conf_map_res['info'] = 'INFO: ddpg' return conf_map_res
def configuration_recommendation_ddpg(result_info): # pylint: disable=invalid-name LOG.info('Use ddpg to recommend configuration') result_id = result_info['newest_result_id'] result = Result.objects.filter(pk=result_id) session = Result.objects.get(pk=result_id).session agg_data = DataUtil.aggregate_data(result) metric_data = agg_data['y_matrix'].flatten() cleaned_agg_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], session) knob_labels = np.array(cleaned_agg_data[1]).flatten() knob_num = len(knob_labels) metric_num = len(metric_data) ddpg = DDPG(n_actions=knob_num, n_states=metric_num, alr=ACTOR_LEARNING_RATE, clr=CRITIC_LEARNING_RATE, gamma=GAMMA, batch_size=DDPG_BATCH_SIZE, tau=TAU) if session.ddpg_actor_model is not None and session.ddpg_critic_model is not None: ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model) if session.ddpg_reply_memory is not None: ddpg.replay_memory.set(session.ddpg_reply_memory) knob_data = ddpg.choose_action(metric_data) LOG.info('recommended knob: %s', knob_data) knob_bounds = np.vstack(DataUtil.get_knob_bounds(knob_labels, session)) knob_data = MinMaxScaler().fit(knob_bounds).inverse_transform(knob_data.reshape(1, -1))[0] conf_map = {k: knob_data[i] for i, k in enumerate(knob_labels)} conf_map_res = {} conf_map_res['status'] = 'good' conf_map_res['result_id'] = result_id conf_map_res['recommendation'] = conf_map conf_map_res['info'] = 'INFO: ddpg' for k in knob_labels: LOG.info('%s: %f', k, conf_map[k]) return conf_map_res
def configuration_recommendation_ddpg(result_info): # pylint: disable=invalid-name start_ts = time.time() LOG.info('Use ddpg to recommend configuration') result_id = result_info['newest_result_id'] result_list = Result.objects.filter(pk=result_id) result = result_list.first() session = result.session params = JSONUtil.loads(session.hyperparameters) agg_data = DataUtil.aggregate_data(result_list) metric_data, _ = clean_metric_data(agg_data['y_matrix'], agg_data['y_columnlabels'], session) metric_data = metric_data.flatten() metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1)) normalized_metric_data = metric_scalar.transform(metric_data.reshape( 1, -1))[0] cleaned_knob_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], session) knob_labels = np.array(cleaned_knob_data[1]).flatten() knob_num = len(knob_labels) metric_num = len(metric_data) ddpg = DDPG(n_actions=knob_num, n_states=metric_num, a_hidden_sizes=params['DDPG_ACTOR_HIDDEN_SIZES'], c_hidden_sizes=params['DDPG_CRITIC_HIDDEN_SIZES'], use_default=params['DDPG_USE_DEFAULT']) if session.ddpg_actor_model is not None and session.ddpg_critic_model is not None: ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model) if session.ddpg_reply_memory is not None: ddpg.replay_memory.set(session.ddpg_reply_memory) knob_data = ddpg.choose_action(normalized_metric_data) knob_bounds = np.vstack(DataUtil.get_knob_bounds(knob_labels, session)) knob_data = MinMaxScaler().fit(knob_bounds).inverse_transform( knob_data.reshape(1, -1))[0] conf_map = {k: knob_data[i] for i, k in enumerate(knob_labels)} conf_map_res = create_and_save_recommendation(recommended_knobs=conf_map, result=result, status='good', info='INFO: ddpg') save_execution_time(start_ts, "configuration_recommendation_ddpg", result) return conf_map_res
def train_ddpg(result_id): LOG.info('Add training data to ddpg and train ddpg') result = Result.objects.get(pk=result_id) session = Result.objects.get(pk=result_id).session session_results = Result.objects.filter( session=session, creation_time__lt=result.creation_time) result_info = {} result_info['newest_result_id'] = result_id if len(session_results) == 0: LOG.info('No previous result. Abort.') return result_info # Extract data from result result = Result.objects.filter(pk=result_id) base_result_id = session_results[0].pk base_result = Result.objects.filter(pk=base_result_id) agg_data = DataUtil.aggregate_data(result) metric_data = agg_data['y_matrix'].flatten() base_metric_data = ( DataUtil.aggregate_data(base_result))['y_matrix'].flatten() metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1)) normalized_metric_data = metric_scalar.transform(metric_data.reshape( 1, -1))[0] # Clean knob data cleaned_knob_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], session) knob_data = np.array(cleaned_knob_data[0]) knob_labels = np.array(cleaned_knob_data[1]) knob_bounds = np.vstack( DataUtil.get_knob_bounds(knob_labels.flatten(), session)) knob_data = MinMaxScaler().fit(knob_bounds).transform(knob_data)[0] knob_num = len(knob_data) metric_num = len(metric_data) LOG.info('knob_num: %d, metric_num: %d', knob_num, metric_num) # Filter ys by current target objective metric result = Result.objects.get(pk=result_id) target_objective = result.session.target_objective target_obj_idx = [ i for i, n in enumerate(agg_data['y_columnlabels']) if n == target_objective ] if len(target_obj_idx) == 0: raise Exception(('Could not find target objective in metrics ' '(target_obj={})').format(target_objective)) elif len(target_obj_idx) > 1: raise Exception( ('Found {} instances of target objective in ' 'metrics (target_obj={})').format(len(target_obj_idx), target_objective)) objective = metric_data[target_obj_idx] base_objective = base_metric_data[target_obj_idx] metric_meta = db.target_objectives.get_metric_metadata( result.session.dbms.pk, result.session.target_objective) # Calculate the reward objective = objective / base_objective if metric_meta[target_objective].improvement == '(less is better)': reward = -objective else: reward = objective LOG.info('reward: %f', reward) # Update ddpg ddpg = DDPG(n_actions=knob_num, n_states=metric_num, alr=ACTOR_LEARNING_RATE, clr=CRITIC_LEARNING_RATE, gamma=0, batch_size=DDPG_BATCH_SIZE) if session.ddpg_actor_model and session.ddpg_critic_model: ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model) if session.ddpg_reply_memory: ddpg.replay_memory.set(session.ddpg_reply_memory) ddpg.add_sample(normalized_metric_data, knob_data, reward, normalized_metric_data) for _ in range(25): ddpg.update() session.ddpg_actor_model, session.ddpg_critic_model = ddpg.get_model() session.ddpg_reply_memory = ddpg.replay_memory.get() session.save() return result_info
def train_ddpg(result_id): LOG.info('Add training data to ddpg and train ddpg') result = Result.objects.get(pk=result_id) session = Result.objects.get(pk=result_id).session params = JSONUtil.loads(session.hyperparameters) session_results = Result.objects.filter( session=session, creation_time__lt=result.creation_time) result_info = {} result_info['newest_result_id'] = result_id # Extract data from result and previous results result = Result.objects.filter(pk=result_id) if len(session_results) == 0: base_result_id = result_id prev_result_id = result_id else: base_result_id = session_results[0].pk prev_result_id = session_results[len(session_results) - 1].pk base_result = Result.objects.filter(pk=base_result_id) prev_result = Result.objects.filter(pk=prev_result_id) agg_data = DataUtil.aggregate_data(result) base_metric_data = ( DataUtil.aggregate_data(base_result))['y_matrix'].flatten() prev_metric_data = ( DataUtil.aggregate_data(prev_result))['y_matrix'].flatten() result = Result.objects.get(pk=result_id) target_objective = result.session.target_objective prev_obj_idx = [ i for i, n in enumerate(agg_data['y_columnlabels']) if n == target_objective ] # Clean metric data metric_data, metric_labels = clean_metric_data(agg_data['y_matrix'], agg_data['y_columnlabels'], session) metric_data = metric_data.flatten() metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1)) normalized_metric_data = metric_scalar.transform(metric_data.reshape( 1, -1))[0] # Clean knob data cleaned_knob_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], session) knob_data = np.array(cleaned_knob_data[0]) knob_labels = np.array(cleaned_knob_data[1]) knob_bounds = np.vstack( DataUtil.get_knob_bounds(knob_labels.flatten(), session)) knob_data = MinMaxScaler().fit(knob_bounds).transform(knob_data)[0] knob_num = len(knob_data) metric_num = len(metric_data) LOG.info('knob_num: %d, metric_num: %d', knob_num, metric_num) # Filter ys by current target objective metric target_obj_idx = [ i for i, n in enumerate(metric_labels) if n == target_objective ] if len(target_obj_idx) == 0: raise Exception(('Could not find target objective in metrics ' '(target_obj={})').format(target_objective)) elif len(target_obj_idx) > 1: raise Exception( ('Found {} instances of target objective in ' 'metrics (target_obj={})').format(len(target_obj_idx), target_objective)) objective = metric_data[target_obj_idx] base_objective = base_metric_data[prev_obj_idx] prev_objective = prev_metric_data[prev_obj_idx] metric_meta = db.target_objectives.get_metric_metadata( result.session.dbms.pk, result.session.target_objective) # Calculate the reward if params['DDPG_SIMPLE_REWARD']: objective = objective / base_objective if metric_meta[target_objective].improvement == '(less is better)': reward = -objective else: reward = objective else: if metric_meta[target_objective].improvement == '(less is better)': if objective - base_objective <= 0: # positive reward reward = (np.square((2 * base_objective - objective) / base_objective) - 1)\ * abs(2 * prev_objective - objective) / prev_objective else: # negative reward reward = -(np.square(objective / base_objective) - 1) * objective / prev_objective else: if objective - base_objective > 0: # positive reward reward = (np.square(objective / base_objective) - 1) * objective / prev_objective else: # negative reward reward = -(np.square((2 * base_objective - objective) / base_objective) - 1)\ * abs(2 * prev_objective - objective) / prev_objective LOG.info('reward: %f', reward) # Update ddpg ddpg = DDPG(n_actions=knob_num, n_states=metric_num, alr=params['DDPG_ACTOR_LEARNING_RATE'], clr=params['DDPG_CRITIC_LEARNING_RATE'], gamma=params['DDPG_GAMMA'], batch_size=params['DDPG_BATCH_SIZE'], a_hidden_sizes=params['DDPG_ACTOR_HIDDEN_SIZES'], c_hidden_sizes=params['DDPG_CRITIC_HIDDEN_SIZES'], use_default=params['DDPG_USE_DEFAULT']) if session.ddpg_actor_model and session.ddpg_critic_model: ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model) if session.ddpg_reply_memory: ddpg.replay_memory.set(session.ddpg_reply_memory) ddpg.add_sample(normalized_metric_data, knob_data, reward, normalized_metric_data) for _ in range(params['DDPG_UPDATE_EPOCHS']): ddpg.update() session.ddpg_actor_model, session.ddpg_critic_model = ddpg.get_model() session.ddpg_reply_memory = ddpg.replay_memory.get() session.save() return result_info