Exemple #1
0
    def q_update(self):
        self.optimizer.zero_grad()
        states, actions, rewards, next_states, dones = [
            x.to(self.device) for x in self.replay_buf.sample(self.batch_size)
        ]

        with torch.no_grad():
            y = torch.where(
                dones, rewards, rewards +
                self.discount_factor * self.target_net(next_states).max(1)[0])

        predicted_values = self.policy_net(states).gather(
            1, actions.unsqueeze(-1)).squeeze(-1)
        loss = huber(y, predicted_values, 2.)
        loss.backward()
        self.optimizer.step()
        return (y - predicted_values).abs().mean()
        # compute average errors
        for name in errors:
            for coeff in errors[name]:
                errors[name][coeff].append(np.mean(tmp_errors[name][coeff]))

    return errors


if __name__ == "__main__":

    # range of values for degrees of freedom
    nus = np.arange(20) / 2 + 0.5

    # experiment 1 - compare tuning parameters for Huber norm
    norms = {}
    for t in range(1, 5):
        name = 'huber-{}'.format(t)
        norms[name] = huber(t)
    norms['ols'] = LeastSquares()

    experiment1 = norm_comparison(norms, nus)

    # create and save figure
    display_results(experiment1, norms, nus, filename='experiment_1.png')

    # experiment 2 - compare ECME algorithm with M-estimates
    experiment2 = emce_comparison(nus)

    # create and save figure
    display_results(experiment2, norms, nus, filename='experiment_2.png')
 def gravity_penalty(self, D: Tensor, huber_k: float = 1.0):
     # D is N X P
     W = self.fx.weight  # N x P
     dist_penalty = huber(W * D, huber_k).mean()
     return dist_penalty
 def shrink_loss(self, huber_k: float = 1.0):
     W = self.fx.weight
     loss = huber(W, huber_k).mean()
     return loss
# %%
epochs = 7_500
print_every = 100
val_every = 1000

# %%

# %%
clamp_weights = config.clamp_weights

Yhat = None  # declare as global scope
effects = [None for _ in hyads_months]

for e in range(epochs):
    Yhat = mod(X, C)
    ll_loss = huber(Y - Yhat, k=1.0).mean()
    tv_loss = mod.tv_loss(src, tgt, edgew)
    shrink_loss = mod.shrink_loss(huber_k)
    gravity_loss = mod.gravity_penalty(D, huber_k)

    if clamp_weights:
        barr_loss = mod.log_barrier()  # optional for pos weights
    else:
        barr_loss = 0.0

    loss = (ll_loss + tv * tv_loss + shrink * shrink_loss +
            gravity * gravity_loss + 1e-6 * barr_loss)
    opt.zero_grad()
    loss.backward()
    opt.step()
    if clamp_weights:
    # estimate params
    model = sm.RLM(y, X, M=norm)
    results = model.fit()
    estimate = results.params

    if verbose:
        print(results.summary2())

    return estimate


if __name__ == '__main__':

    data = load_data()

    results = pd.DataFrame(columns=['filename', 'a', 'b'])

    for filename, df in sorted(data.items()):

        print(filename)
        print("Compute M-estimate using Huber-norm reweighting.")
        b, a = m_estimate(df, norm=huber(), verbose=True)

        # add estimate to results
        row = {'filename': filename, 'a': a, 'b': b}
        results = results.append(row, ignore_index=True)

    # save as .csv
    results.to_csv(OUTPUT_FILENAME)