def configuration_recommendation_ddpg(result_info): # pylint: disable=invalid-name LOG.info('Use ddpg to recommend configuration') result_id = result_info['newest_result_id'] result = Result.objects.filter(pk=result_id) session = Result.objects.get(pk=result_id).session agg_data = DataUtil.aggregate_data(result) metric_data = agg_data['y_matrix'].flatten() metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1)) normalized_metric_data = metric_scalar.transform(metric_data.reshape( 1, -1))[0] cleaned_knob_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], session) knob_labels = np.array(cleaned_knob_data[1]).flatten() knob_num = len(knob_labels) metric_num = len(metric_data) ddpg = DDPG(n_actions=knob_num, n_states=metric_num) if session.ddpg_actor_model is not None and session.ddpg_critic_model is not None: ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model) if session.ddpg_reply_memory is not None: ddpg.replay_memory.set(session.ddpg_reply_memory) knob_data = ddpg.choose_action(normalized_metric_data) knob_bounds = np.vstack(DataUtil.get_knob_bounds(knob_labels, session)) knob_data = MinMaxScaler().fit(knob_bounds).inverse_transform( knob_data.reshape(1, -1))[0] conf_map = {k: knob_data[i] for i, k in enumerate(knob_labels)} conf_map_res = {} conf_map_res['status'] = 'good' conf_map_res['result_id'] = result_id conf_map_res['recommendation'] = conf_map conf_map_res['info'] = 'INFO: ddpg' return conf_map_res
def configuration_recommendation_ddpg(result_info): # pylint: disable=invalid-name LOG.info('Use ddpg to recommend configuration') result_id = result_info['newest_result_id'] result = Result.objects.filter(pk=result_id) session = Result.objects.get(pk=result_id).session agg_data = DataUtil.aggregate_data(result) metric_data = agg_data['y_matrix'].flatten() cleaned_agg_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], session) knob_labels = np.array(cleaned_agg_data[1]).flatten() knob_num = len(knob_labels) metric_num = len(metric_data) ddpg = DDPG(n_actions=knob_num, n_states=metric_num, alr=ACTOR_LEARNING_RATE, clr=CRITIC_LEARNING_RATE, gamma=GAMMA, batch_size=DDPG_BATCH_SIZE, tau=TAU) if session.ddpg_actor_model is not None and session.ddpg_critic_model is not None: ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model) if session.ddpg_reply_memory is not None: ddpg.replay_memory.set(session.ddpg_reply_memory) knob_data = ddpg.choose_action(metric_data) LOG.info('recommended knob: %s', knob_data) knob_bounds = np.vstack(DataUtil.get_knob_bounds(knob_labels, session)) knob_data = MinMaxScaler().fit(knob_bounds).inverse_transform(knob_data.reshape(1, -1))[0] conf_map = {k: knob_data[i] for i, k in enumerate(knob_labels)} conf_map_res = {} conf_map_res['status'] = 'good' conf_map_res['result_id'] = result_id conf_map_res['recommendation'] = conf_map conf_map_res['info'] = 'INFO: ddpg' for k in knob_labels: LOG.info('%s: %f', k, conf_map[k]) return conf_map_res
def ddpg(env, config, n_loops=100): results = [] x_axis = [] num_collections = config['num_collections'] gamma = config['gamma'] a_lr = config['a_lr'] c_lr = config['c_lr'] n_epochs = config['n_epochs'] ahs = config['a_hidden_sizes'] chs = config['c_hidden_sizes'] model_ddpg = DDPG(n_actions=env.knob_dim, n_states=env.metric_dim, gamma=gamma, clr=c_lr, alr=a_lr, shift=0, a_hidden_sizes=ahs, c_hidden_sizes=chs) knob_data = np.random.rand(env.knob_dim) prev_metric_data = np.zeros(env.metric_dim) for i in range(num_collections): action = np.random.rand(env.knob_dim) reward, metric_data = env.simulate(action) if i > 0: model_ddpg.add_sample(prev_metric_data, prev_knob_data, prev_reward, metric_data) prev_metric_data = metric_data prev_knob_data = knob_data prev_reward = reward for i in range(n_loops): reward, metric_data = env.simulate(knob_data) model_ddpg.add_sample(prev_metric_data, prev_knob_data, prev_reward, prev_metric_data) prev_metric_data = metric_data prev_knob_data = knob_data prev_reward = reward for _ in range(n_epochs): model_ddpg.update() results.append(reward) x_axis.append(i + 1) LOG.info('loop: %d reward: %f', i, reward[0]) knob_data = model_ddpg.choose_action(metric_data) return np.array(results), np.array(x_axis)
def configuration_recommendation_ddpg(result_info): # pylint: disable=invalid-name start_ts = time.time() LOG.info('Use ddpg to recommend configuration') result_id = result_info['newest_result_id'] result_list = Result.objects.filter(pk=result_id) result = result_list.first() session = result.session params = JSONUtil.loads(session.hyperparameters) agg_data = DataUtil.aggregate_data(result_list) metric_data, _ = clean_metric_data(agg_data['y_matrix'], agg_data['y_columnlabels'], session) metric_data = metric_data.flatten() metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1)) normalized_metric_data = metric_scalar.transform(metric_data.reshape( 1, -1))[0] cleaned_knob_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], session) knob_labels = np.array(cleaned_knob_data[1]).flatten() knob_num = len(knob_labels) metric_num = len(metric_data) ddpg = DDPG(n_actions=knob_num, n_states=metric_num, a_hidden_sizes=params['DDPG_ACTOR_HIDDEN_SIZES'], c_hidden_sizes=params['DDPG_CRITIC_HIDDEN_SIZES'], use_default=params['DDPG_USE_DEFAULT']) if session.ddpg_actor_model is not None and session.ddpg_critic_model is not None: ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model) if session.ddpg_reply_memory is not None: ddpg.replay_memory.set(session.ddpg_reply_memory) knob_data = ddpg.choose_action(normalized_metric_data) knob_bounds = np.vstack(DataUtil.get_knob_bounds(knob_labels, session)) knob_data = MinMaxScaler().fit(knob_bounds).inverse_transform( knob_data.reshape(1, -1))[0] conf_map = {k: knob_data[i] for i, k in enumerate(knob_labels)} conf_map_res = create_and_save_recommendation(recommended_knobs=conf_map, result=result, status='good', info='INFO: ddpg') save_execution_time(start_ts, "configuration_recommendation_ddpg", result) return conf_map_res