def evaluate(method: storch.method.Method, model: DiscreteVAE, data, optimizer):
    # Compute expected gradient
    optimizer.zero_grad()

    z = generative_story(storch.method.Expect("z"), model, data)
    storch.backward()
    expected_gradient = z.param_grads["probs"]

    # Collect gradient samples
    gradients = []
    for i in range(100):
        optimizer.zero_grad()

        z = generative_story(method, model, data)
        elbo = storch.backward()
        gradients.append(z.param_grads["probs"])

    gradients = storch.gather_samples(gradients, "gradients")
    mean_gradient = storch.reduce_plates(gradients, "gradients")
    bias_gradient = (
        storch.reduce_plates((mean_gradient - expected_gradient) ** 2)
    ).sum()
    print(
        "Training ELBO "
        + str(elbo.item())
        + " Gradient variance "
        + str(storch.variance(gradients, "gradients")._tensor.item())
        + " Gradient bias "
        + str(bias_gradient._tensor.item())
    )
Exemple #2
0
def experiment(method):
    optim.zero_grad()
    b = Bernoulli(logits=eta.repeat(3))
    x = method(b)
    cost = torch.sum((x - p)**2, -1)
    storch.add_cost(cost, "cost")
    storch.backward()
    return eta.grad.clone()
def experiment(method):
    for i in range(2000):
        optim.zero_grad()
        b = Bernoulli(logits=eta)
        x = method(b)
        cost = torch.sum((x - p)**2, -1)
        storch.add_cost(cost, "cost")
        storch.backward()
        optim.step()
        if i % 100 == 0:
            print(eta)
Exemple #4
0
def estimate_variance(method):
    gradient_samples = []
    for i in range(1000):
        f, c = compute_f(method)
        storch.add_cost(f, "f")
        storch.backward()
        gradient_samples.append(c.grad)
    gradients = storch.gather_samples(gradient_samples, "gradients")
    # print(gradients)
    print("variance", storch.variance(gradients, "gradients"))
    print("mean", storch.reduce_plates(gradients, "gradients"))
    print("st dev", torch.sqrt(storch.variance(gradients, "gradients")))

    print(type(gradients))
    print(gradients.shape)
    print(gradients.plates)
def train(method: storch.method.Method, train_loader):
    model = DiscreteVAE()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    for epoch in range(5):
        print("Epoch:" + str(epoch + 1))
        for i, (data, _) in enumerate(train_loader):
            # if i % 300 == 0:
            # evaluate(method, model, data, optimizer)
            optimizer.zero_grad()

            generative_story(method, model, data)
            elbo = storch.backward()
            optimizer.step()
            if i % 300 == 0:
                print("Training ELBO " + str(elbo.item()))
Exemple #6
0
import storch
import torch
from torch.distributions import Bernoulli
from storch.method import GumbelSoftmax

torch.manual_seed(0)

p = torch.tensor(0.5, requires_grad=True)

for i in range(10000):
    sample = GumbelSoftmax(f"sample_{i}")(Bernoulli(p))
    storch.add_cost(sample, f"cost_{i}")

storch.backward()
print("Finished")
Exemple #7
0
score_method = storch.method.ScoreFunction("white_noise_1", n_samples=2)
infer_method = storch.method.Infer("white_noise_2", Normal)


def loss(v):
    return torch.nn.MSELoss(reduction="none")(v, theta).mean(dim=-1)


mu = lax_method(Normal(mu_prior, 1))
k = expect(
    Categorical(probs=torch.tensor([[0.1, 0.3, 0.6], [0.1, 0.8, 0.1]],
                                   requires_grad=True)), )

agg_v = 0.0
s1 = 1.0
for i in range(2):
    k1, k2 = 0, 0
    if i == 1:
        k1 = k[:, 0]
        k2 = k[:, 1]
    s1 = score_method(Normal(mu + k1, 1))
    aaa = -mu + s1 * k2
    s2 = infer_method(Normal(-mu + s1 * k2, 1))
    # plus = lambda a, b: a + b
    # plus = storch.deterministic(plus)
    agg_v = agg_v + s1 + s2 * mu
    print(isinstance(agg_v, Iterable))
    storch.add_cost(loss(agg_v), "loss")

storch.backward(debug=False, print_costs=True)
Exemple #8
0
import storch
import torch
from torch.distributions import Bernoulli, OneHotCategorical

expect = storch.method.Expect("x")
probs = torch.tensor([0.95, 0.01, 0.01, 0.01, 0.01, 0.01], requires_grad=True)
indices = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
b = OneHotCategorical(probs=probs)
z = expect.sample(b)
c = (2.4 * z * indices).sum(-1)
storch.add_cost(c, "no_baseline_cost")

storch.backward()

expect_grad = z.grad["probs"].clone()


def eval(grads):
    print("----------------------------------")
    grad_samples = storch.gather_samples(grads, "variance")
    mean = storch.reduce_plates(grad_samples, plates=["variance"])
    print("mean grad", mean)
    print("expected grad", expect_grad)
    print("specific_diffs", (mean - expect_grad)**2)
    mse = storch.reduce_plates((grad_samples - expect_grad)**2).sum()
    print("MSE", mse)
    bias = (storch.reduce_plates((mean - expect_grad)**2)).sum()
    print("bias", bias)
    return bias

Exemple #9
0
    d = a + b

    # Sample e from a normal distribution using reparameterization
    normal_distribution = Normal(b + c, 1)
    e = method(normal_distribution)

    f = d * e * e
    return f, c


# e*e follows a noncentral chi-squared distribution https://en.wikipedia.org/wiki/Noncentral_chi-squared_distribution
# exp_f = d * (1 + mu * mu)
repar = Reparameterization("e", n_samples=1)
f, c = compute_f(repar)
storch.add_cost(f, "f")
print(storch.backward())

print("first derivative estimate", c.grad)

f, c = compute_f(repar)
storch.add_cost(f, "f")
print(storch.backward())

print("second derivative estimate", c.grad)


def estimate_variance(method):
    gradient_samples = []
    for i in range(1000):
        f, c = compute_f(method)
        storch.add_cost(f, "f")
Exemple #10
0
def train(epoch, model, train_loader, device, optimizer, args, writer):
    model.train()
    train_loss = 0
    for batch_idx, (data, _) in enumerate(train_loader):
        data = data.to(device)
        optimizer.zero_grad()
        storch.reset()

        # Denote the minibatch dimension as being independent
        data = storch.denote_independent(data.view(-1, 784), 0, "data")
        recon_batch, KLD, z = model(data)
        storch.add_cost(loss_function(recon_batch, data), "reconstruction")
        cost = backward()
        train_loss += cost.item()

        optimizer.step()

        cond_log = batch_idx % args.log_interval == 0

        if cond_log:
            step = 100.0 * batch_idx / len(train_loader)
            global_step = 100 * (epoch - 1) + step

            # Variance of expect method is 0 by definition.
            variances = {}
            if args.method != "expect" and args.variance_samples > 1:
                _consider_param = "probs"
                if args.latents < 3:
                    old_method = model.sampling_method
                    model.sampling_method = Expect("z")
                    optimizer.zero_grad()
                    recon_batch, _, z = model(data)
                    storch.add_cost(loss_function(recon_batch, data),
                                    "reconstruction")
                    backward()
                    expect_grad = storch.reduce_plates(
                        z.grad[_consider_param]).detach_tensor()

                    optimizer.zero_grad()
                    model.sampling_method = old_method
                grads = {n: [] for n in z.grad}

                for i in range(args.variance_samples):
                    optimizer.zero_grad()
                    recon_batch, _, z = model(data)
                    storch.add_cost(loss_function(recon_batch, data),
                                    "reconstruction")
                    backward()

                    for param_name, grad in z.grad.items():
                        # Make sure to reduce the data dimension and detach, for memory reasons.
                        grads[param_name].append(
                            storch.reduce_plates(grad).detach_tensor())

                variances = {}
                for param_name, gradz in grads.items():
                    # Create a new independent dimension for the different gradient samples
                    grad_samples = storch.gather_samples(gradz, "variance")
                    # Compute the variance over this independent dimension
                    variances[param_name] = storch.variance(
                        grad_samples, "variance")._tensor
                    if param_name == _consider_param and args.latents < 3:
                        mean = storch.reduce_plates(grad_samples, "variance")
                        mse = storch.reduce_plates(
                            (grad_samples - expect_grad)**2).sum()
                        bias = (storch.reduce_plates(
                            (mean - expect_grad)**2)).sum()
                        print("mse", mse._tensor.item())
                        # Should approach 0 when increasing variance_samples for unbiased estimators.
                        print("bias", bias._tensor.item())
                        writer.add_scalar("train/probs_bias", bias._tensor,
                                          global_step)
                        writer.add_scalar("train/probs_mse", mse._tensor,
                                          global_step)

            print(
                "Train Epoch: {} [{}/{} ({:.0f}%)]\tCost: {:.6f}\t Logits var {}"
                .format(
                    epoch,
                    batch_idx * len(data),
                    len(train_loader.dataset),
                    step,
                    cost.item(),
                    variances,
                ))
            writer.add_scalar("train/ELBO", cost, global_step)
            for param_name, var in variances.items():
                writer.add_scalar("train/variance/" + param_name, var,
                                  global_step)
    avg_train_loss = train_loss / (batch_idx + 1)
    print("====> Epoch: {} Average loss: {:.4f}".format(epoch, avg_train_loss))
    return avg_train_loss