Ejemplo n.º 1
0
def ddpg(env, config, n_loops=100):
    results = []
    x_axis = []
    num_collections = config['num_collections']
    gamma = config['gamma']
    a_lr = config['a_lr']
    c_lr = config['c_lr']
    n_epochs = config['n_epochs']
    ahs = config['a_hidden_sizes']
    chs = config['c_hidden_sizes']
    model_ddpg = DDPG(n_actions=env.knob_dim,
                      n_states=env.metric_dim,
                      gamma=gamma,
                      clr=c_lr,
                      alr=a_lr,
                      shift=0,
                      a_hidden_sizes=ahs,
                      c_hidden_sizes=chs)
    knob_data = np.random.rand(env.knob_dim)
    prev_metric_data = np.zeros(env.metric_dim)

    for i in range(num_collections):
        action = np.random.rand(env.knob_dim)
        reward, metric_data = env.simulate(action)
        if i > 0:
            model_ddpg.add_sample(prev_metric_data, prev_knob_data,
                                  prev_reward, metric_data)
        prev_metric_data = metric_data
        prev_knob_data = knob_data
        prev_reward = reward

    for i in range(n_loops):
        reward, metric_data = env.simulate(knob_data)
        model_ddpg.add_sample(prev_metric_data, prev_knob_data, prev_reward,
                              prev_metric_data)
        prev_metric_data = metric_data
        prev_knob_data = knob_data
        prev_reward = reward
        for _ in range(n_epochs):
            model_ddpg.update()
        results.append(reward)
        x_axis.append(i + 1)
        LOG.info('loop: %d reward: %f', i, reward[0])
        knob_data = model_ddpg.choose_action(metric_data)
    return np.array(results), np.array(x_axis)
Ejemplo n.º 2
0
def train_ddpg(result_id):
    LOG.info('Add training data to ddpg and train ddpg')
    result = Result.objects.get(pk=result_id)
    session = Result.objects.get(pk=result_id).session
    session_results = Result.objects.filter(
        session=session, creation_time__lt=result.creation_time)
    result_info = {}
    result_info['newest_result_id'] = result_id
    if len(session_results) == 0:
        LOG.info('No previous result. Abort.')
        return result_info

    # Extract data from result
    result = Result.objects.filter(pk=result_id)
    base_result_id = session_results[0].pk
    base_result = Result.objects.filter(pk=base_result_id)

    agg_data = DataUtil.aggregate_data(result)
    metric_data = agg_data['y_matrix'].flatten()
    base_metric_data = (
        DataUtil.aggregate_data(base_result))['y_matrix'].flatten()
    metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1))
    normalized_metric_data = metric_scalar.transform(metric_data.reshape(
        1, -1))[0]

    # Clean knob data
    cleaned_knob_data = clean_knob_data(agg_data['X_matrix'],
                                        agg_data['X_columnlabels'], session)
    knob_data = np.array(cleaned_knob_data[0])
    knob_labels = np.array(cleaned_knob_data[1])
    knob_bounds = np.vstack(
        DataUtil.get_knob_bounds(knob_labels.flatten(), session))
    knob_data = MinMaxScaler().fit(knob_bounds).transform(knob_data)[0]
    knob_num = len(knob_data)
    metric_num = len(metric_data)
    LOG.info('knob_num: %d, metric_num: %d', knob_num, metric_num)

    # Filter ys by current target objective metric
    result = Result.objects.get(pk=result_id)
    target_objective = result.session.target_objective
    target_obj_idx = [
        i for i, n in enumerate(agg_data['y_columnlabels'])
        if n == target_objective
    ]
    if len(target_obj_idx) == 0:
        raise Exception(('Could not find target objective in metrics '
                         '(target_obj={})').format(target_objective))
    elif len(target_obj_idx) > 1:
        raise Exception(
            ('Found {} instances of target objective in '
             'metrics (target_obj={})').format(len(target_obj_idx),
                                               target_objective))
    objective = metric_data[target_obj_idx]
    base_objective = base_metric_data[target_obj_idx]
    metric_meta = db.target_objectives.get_metric_metadata(
        result.session.dbms.pk, result.session.target_objective)

    # Calculate the reward
    objective = objective / base_objective
    if metric_meta[target_objective].improvement == '(less is better)':
        reward = -objective
    else:
        reward = objective
    LOG.info('reward: %f', reward)

    # Update ddpg
    ddpg = DDPG(n_actions=knob_num,
                n_states=metric_num,
                alr=ACTOR_LEARNING_RATE,
                clr=CRITIC_LEARNING_RATE,
                gamma=0,
                batch_size=DDPG_BATCH_SIZE)
    if session.ddpg_actor_model and session.ddpg_critic_model:
        ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model)
    if session.ddpg_reply_memory:
        ddpg.replay_memory.set(session.ddpg_reply_memory)
    ddpg.add_sample(normalized_metric_data, knob_data, reward,
                    normalized_metric_data)
    for _ in range(25):
        ddpg.update()
    session.ddpg_actor_model, session.ddpg_critic_model = ddpg.get_model()
    session.ddpg_reply_memory = ddpg.replay_memory.get()
    session.save()
    return result_info
Ejemplo n.º 3
0
def train_ddpg(result_id):
    LOG.info('Add training data to ddpg and train ddpg')
    result = Result.objects.get(pk=result_id)
    session = Result.objects.get(pk=result_id).session
    params = JSONUtil.loads(session.hyperparameters)
    session_results = Result.objects.filter(
        session=session, creation_time__lt=result.creation_time)
    result_info = {}
    result_info['newest_result_id'] = result_id

    # Extract data from result and previous results
    result = Result.objects.filter(pk=result_id)
    if len(session_results) == 0:
        base_result_id = result_id
        prev_result_id = result_id
    else:
        base_result_id = session_results[0].pk
        prev_result_id = session_results[len(session_results) - 1].pk
    base_result = Result.objects.filter(pk=base_result_id)
    prev_result = Result.objects.filter(pk=prev_result_id)

    agg_data = DataUtil.aggregate_data(result)
    base_metric_data = (
        DataUtil.aggregate_data(base_result))['y_matrix'].flatten()
    prev_metric_data = (
        DataUtil.aggregate_data(prev_result))['y_matrix'].flatten()

    result = Result.objects.get(pk=result_id)
    target_objective = result.session.target_objective
    prev_obj_idx = [
        i for i, n in enumerate(agg_data['y_columnlabels'])
        if n == target_objective
    ]

    # Clean metric data
    metric_data, metric_labels = clean_metric_data(agg_data['y_matrix'],
                                                   agg_data['y_columnlabels'],
                                                   session)
    metric_data = metric_data.flatten()
    metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1))
    normalized_metric_data = metric_scalar.transform(metric_data.reshape(
        1, -1))[0]

    # Clean knob data
    cleaned_knob_data = clean_knob_data(agg_data['X_matrix'],
                                        agg_data['X_columnlabels'], session)
    knob_data = np.array(cleaned_knob_data[0])
    knob_labels = np.array(cleaned_knob_data[1])
    knob_bounds = np.vstack(
        DataUtil.get_knob_bounds(knob_labels.flatten(), session))
    knob_data = MinMaxScaler().fit(knob_bounds).transform(knob_data)[0]
    knob_num = len(knob_data)
    metric_num = len(metric_data)
    LOG.info('knob_num: %d, metric_num: %d', knob_num, metric_num)

    # Filter ys by current target objective metric
    target_obj_idx = [
        i for i, n in enumerate(metric_labels) if n == target_objective
    ]
    if len(target_obj_idx) == 0:
        raise Exception(('Could not find target objective in metrics '
                         '(target_obj={})').format(target_objective))
    elif len(target_obj_idx) > 1:
        raise Exception(
            ('Found {} instances of target objective in '
             'metrics (target_obj={})').format(len(target_obj_idx),
                                               target_objective))
    objective = metric_data[target_obj_idx]
    base_objective = base_metric_data[prev_obj_idx]
    prev_objective = prev_metric_data[prev_obj_idx]
    metric_meta = db.target_objectives.get_metric_metadata(
        result.session.dbms.pk, result.session.target_objective)

    # Calculate the reward
    if params['DDPG_SIMPLE_REWARD']:
        objective = objective / base_objective
        if metric_meta[target_objective].improvement == '(less is better)':
            reward = -objective
        else:
            reward = objective
    else:
        if metric_meta[target_objective].improvement == '(less is better)':
            if objective - base_objective <= 0:  # positive reward
                reward = (np.square((2 * base_objective - objective) / base_objective) - 1)\
                    * abs(2 * prev_objective - objective) / prev_objective
            else:  # negative reward
                reward = -(np.square(objective / base_objective) -
                           1) * objective / prev_objective
        else:
            if objective - base_objective > 0:  # positive reward
                reward = (np.square(objective / base_objective) -
                          1) * objective / prev_objective
            else:  # negative reward
                reward = -(np.square((2 * base_objective - objective) / base_objective) - 1)\
                    * abs(2 * prev_objective - objective) / prev_objective
    LOG.info('reward: %f', reward)

    # Update ddpg
    ddpg = DDPG(n_actions=knob_num,
                n_states=metric_num,
                alr=params['DDPG_ACTOR_LEARNING_RATE'],
                clr=params['DDPG_CRITIC_LEARNING_RATE'],
                gamma=params['DDPG_GAMMA'],
                batch_size=params['DDPG_BATCH_SIZE'],
                a_hidden_sizes=params['DDPG_ACTOR_HIDDEN_SIZES'],
                c_hidden_sizes=params['DDPG_CRITIC_HIDDEN_SIZES'],
                use_default=params['DDPG_USE_DEFAULT'])
    if session.ddpg_actor_model and session.ddpg_critic_model:
        ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model)
    if session.ddpg_reply_memory:
        ddpg.replay_memory.set(session.ddpg_reply_memory)
    ddpg.add_sample(normalized_metric_data, knob_data, reward,
                    normalized_metric_data)
    for _ in range(params['DDPG_UPDATE_EPOCHS']):
        ddpg.update()
    session.ddpg_actor_model, session.ddpg_critic_model = ddpg.get_model()
    session.ddpg_reply_memory = ddpg.replay_memory.get()
    session.save()
    return result_info