예제 #1
0
파일: test_enum.py 프로젝트: lewisKit/pyro
def test_svi_step_guide_uses_grad(enumerate1):
    data = torch.tensor([0., 1., 3.])

    @poutine.broadcast
    def model():
        scale = pyro.param("scale")
        loc = pyro.sample("loc", dist.Normal(0., 10.))
        with pyro.iarange("data", len(data)):
            pyro.sample("obs", dist.Normal(loc, scale), obs=data)
        pyro.sample("b", dist.Bernoulli(0.5))

    @config_enumerate(default=enumerate1)
    def guide():
        p = pyro.param("p", torch.tensor(0.5), constraint=constraints.unit_interval)
        scale = pyro.param("scale", torch.tensor(1.0), constraint=constraints.positive)
        var = pyro.param("var", torch.tensor(1.0), constraint=constraints.positive)

        x = torch.tensor(0., requires_grad=True)
        prior = dist.Normal(0., 10.).log_prob(x)
        likelihood = dist.Normal(x, scale).log_prob(data).sum()
        loss = -(prior + likelihood)
        g = grad(loss, [x], create_graph=True)[0]
        H = grad(g, [x], create_graph=True)[0]
        loc = x.detach() - g / H  # newton step
        pyro.sample("loc", dist.Normal(loc, var))
        pyro.sample("b", dist.Bernoulli(p))

    elbo = TraceEnum_ELBO(max_iarange_nesting=1,
                          strict_enumeration_warning=any([enumerate1]))
    inference = SVI(model, guide, pyro.optim.Adam({}), elbo)
    inference.step()
예제 #2
0
파일: test_jit.py 프로젝트: lewisKit/pyro
def test_dirichlet_bernoulli(Elbo, vectorized):
    pyro.clear_param_store()
    data = torch.tensor([1.0] * 6 + [0.0] * 4)

    def model1(data):
        concentration0 = torch.tensor([10.0, 10.0])
        f = pyro.sample("latent_fairness", dist.Dirichlet(concentration0))[1]
        for i in pyro.irange("irange", len(data)):
            pyro.sample("obs_{}".format(i), dist.Bernoulli(f), obs=data[i])

    def model2(data):
        concentration0 = torch.tensor([10.0, 10.0])
        f = pyro.sample("latent_fairness", dist.Dirichlet(concentration0))[1]
        pyro.sample("obs", dist.Bernoulli(f).expand_by(data.shape).independent(1),
                    obs=data)

    model = model2 if vectorized else model1

    def guide(data):
        concentration_q = pyro.param("concentration_q", torch.tensor([15.0, 15.0]),
                                     constraint=constraints.positive)
        pyro.sample("latent_fairness", dist.Dirichlet(concentration_q))

    elbo = Elbo(num_particles=7, strict_enumeration_warning=False)
    optim = Adam({"lr": 0.0005, "betas": (0.90, 0.999)})
    svi = SVI(model, guide, optim, elbo)
    for step in range(40):
        svi.step(data)
예제 #3
0
def assert_ok(model, guide, elbo):
    """
    Assert that inference works without warnings or errors.
    """
    pyro.clear_param_store()
    inference = SVI(model, guide, Adam({"lr": 1e-6}), elbo)
    inference.step()
예제 #4
0
파일: test_optim.py 프로젝트: lewisKit/pyro
def test_dynamic_lr(scheduler, num_steps):
    pyro.clear_param_store()

    def model():
        sample = pyro.sample('latent', Normal(torch.tensor(0.), torch.tensor(0.3)))
        return pyro.sample('obs', Normal(sample, torch.tensor(0.2)), obs=torch.tensor(0.1))

    def guide():
        loc = pyro.param('loc', torch.tensor(0.))
        scale = pyro.param('scale', torch.tensor(0.5))
        pyro.sample('latent', Normal(loc, scale))

    svi = SVI(model, guide, scheduler, loss=TraceGraph_ELBO())
    for epoch in range(2):
        scheduler.set_epoch(epoch)
        for _ in range(num_steps):
            svi.step()
        if epoch == 1:
            loc = pyro.param('loc')
            scale = pyro.param('scale')
            opt = scheduler.optim_objs[loc].optimizer
            assert opt.state_dict()['param_groups'][0]['lr'] == 0.02
            assert opt.state_dict()['param_groups'][0]['initial_lr'] == 0.01
            opt = scheduler.optim_objs[scale].optimizer
            assert opt.state_dict()['param_groups'][0]['lr'] == 0.02
            assert opt.state_dict()['param_groups'][0]['initial_lr'] == 0.01
예제 #5
0
파일: test_advi.py 프로젝트: lewisKit/pyro
def test_quantiles(auto_class, Elbo):

    def model():
        pyro.sample("x", dist.Normal(0.0, 1.0))
        pyro.sample("y", dist.LogNormal(0.0, 1.0))
        pyro.sample("z", dist.Beta(2.0, 2.0))

    guide = auto_class(model)
    infer = SVI(model, guide, Adam({'lr': 0.01}), Elbo(strict_enumeration_warning=False))
    for _ in range(100):
        infer.step()

    quantiles = guide.quantiles([0.1, 0.5, 0.9])
    median = guide.median()
    for name in ["x", "y", "z"]:
        assert_equal(median[name], quantiles[name][1])
    quantiles = {name: [v.item() for v in value] for name, value in quantiles.items()}

    assert -3.0 < quantiles["x"][0]
    assert quantiles["x"][0] + 1.0 < quantiles["x"][1]
    assert quantiles["x"][1] + 1.0 < quantiles["x"][2]
    assert quantiles["x"][2] < 3.0

    assert 0.01 < quantiles["y"][0]
    assert quantiles["y"][0] * 2.0 < quantiles["y"][1]
    assert quantiles["y"][1] * 2.0 < quantiles["y"][2]
    assert quantiles["y"][2] < 100.0

    assert 0.01 < quantiles["z"][0]
    assert quantiles["z"][0] + 0.1 < quantiles["z"][1]
    assert quantiles["z"][1] + 0.1 < quantiles["z"][2]
    assert quantiles["z"][2] < 0.99
예제 #6
0
def assert_error(model, guide, elbo):
    """
    Assert that inference fails with an error.
    """
    pyro.clear_param_store()
    inference = SVI(model,  guide, Adam({"lr": 1e-6}), elbo)
    with pytest.raises((NotImplementedError, UserWarning, KeyError, ValueError, RuntimeError)):
        inference.step()
예제 #7
0
def test_svi_step_smoke(model, guide, enum_discrete, trace_graph):
    pyro.clear_param_store()
    data = Variable(torch.Tensor([0, 1, 9]))

    optimizer = pyro.optim.Adam({"lr": .001})
    inference = SVI(model, guide, optimizer, loss="ELBO",
                    trace_graph=trace_graph, enum_discrete=enum_discrete)
    with xfail_if_not_implemented():
        inference.step(data)
    def do_elbo_test(self, reparameterized, n_steps, lr, prec, beta1,
                     difficulty=1.0, model_permutation=False):
        n_repa_nodes = torch.sum(self.which_nodes_reparam) if not reparameterized \
            else len(self.q_topo_sort)
        logger.info((" - - - DO GAUSSIAN %d-LAYERED PYRAMID ELBO TEST " +
                     "(with a total of %d RVs) [reparameterized=%s; %d/%d; perm=%s] - - -") %
                    (self.N, (2 ** self.N) - 1, reparameterized, n_repa_nodes,
                     len(self.q_topo_sort), model_permutation))
        pyro.clear_param_store()

        # check graph structure is as expected but only for N=2
        if self.N == 2:
            guide_trace = pyro.poutine.trace(self.guide,
                                             graph_type="dense").get_trace(reparameterized=reparameterized,
                                                                           model_permutation=model_permutation,
                                                                           difficulty=difficulty)
            expected_nodes = set(['log_sig_1R', 'kappa_1_1L', '_INPUT', 'constant_term_loc_latent_1R', '_RETURN',
                                  'loc_latent_1R', 'loc_latent_1', 'constant_term_loc_latent_1', 'loc_latent_1L',
                                  'constant_term_loc_latent_1L', 'log_sig_1L', 'kappa_1_1R', 'kappa_1R_1L',
                                  'log_sig_1'])
            expected_edges = set([('loc_latent_1R', 'loc_latent_1'), ('loc_latent_1L', 'loc_latent_1R'),
                                  ('loc_latent_1L', 'loc_latent_1')])
            assert expected_nodes == set(guide_trace.nodes)
            assert expected_edges == set(guide_trace.edges)

        adam = optim.Adam({"lr": lr, "betas": (beta1, 0.999)})
        svi = SVI(self.model, self.guide, adam, loss=TraceGraph_ELBO())

        for step in range(n_steps):
            t0 = time.time()
            svi.step(reparameterized=reparameterized, model_permutation=model_permutation, difficulty=difficulty)

            if step % 5000 == 0 or step == n_steps - 1:
                log_sig_errors = []
                for node in self.target_lambdas:
                    target_log_sig = -0.5 * torch.log(self.target_lambdas[node])
                    log_sig_error = param_mse('log_sig_' + node, target_log_sig)
                    log_sig_errors.append(log_sig_error)
                max_log_sig_error = np.max(log_sig_errors)
                min_log_sig_error = np.min(log_sig_errors)
                mean_log_sig_error = np.mean(log_sig_errors)
                leftmost_node = self.q_topo_sort[0]
                leftmost_constant_error = param_mse('constant_term_' + leftmost_node,
                                                    self.target_leftmost_constant)
                almost_leftmost_constant_error = param_mse('constant_term_' + leftmost_node[:-1] + 'R',
                                                           self.target_almost_leftmost_constant)

                logger.debug("[mean function constant errors (partial)]   %.4f  %.4f" %
                             (leftmost_constant_error, almost_leftmost_constant_error))
                logger.debug("[min/mean/max log(scale) errors]   %.4f  %.4f   %.4f" %
                             (min_log_sig_error, mean_log_sig_error, max_log_sig_error))
                logger.debug("[step time = %.3f;  N = %d;  step = %d]\n" % (time.time() - t0, self.N, step))

        assert_equal(0.0, max_log_sig_error, prec=prec)
        assert_equal(0.0, leftmost_constant_error, prec=prec)
        assert_equal(0.0, almost_leftmost_constant_error, prec=prec)
예제 #9
0
파일: test_enum.py 프로젝트: lewisKit/pyro
def test_svi_step_smoke(model, guide, enumerate1):
    pyro.clear_param_store()
    data = torch.tensor([0.0, 1.0, 9.0])

    guide = config_enumerate(guide, default=enumerate1)
    optimizer = pyro.optim.Adam({"lr": .001})
    elbo = TraceEnum_ELBO(max_iarange_nesting=1,
                          strict_enumeration_warning=any([enumerate1]))
    inference = SVI(model, guide, optimizer, loss=elbo)
    inference.step(data)
예제 #10
0
def assert_warning(model, guide, elbo):
    """
    Assert that inference works but with a warning.
    """
    pyro.clear_param_store()
    inference = SVI(model,  guide, Adam({"lr": 1e-6}), elbo)
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        inference.step()
        assert len(w), 'No warnings were raised'
        for warning in w:
            logger.info(warning)
예제 #11
0
파일: test_jit.py 프로젝트: lewisKit/pyro
def test_svi(Elbo, num_particles):
    pyro.clear_param_store()
    data = torch.arange(10)

    def model(data):
        loc = pyro.param("loc", torch.tensor(0.0))
        scale = pyro.param("scale", torch.tensor(1.0), constraint=constraints.positive)
        pyro.sample("x", dist.Normal(loc, scale).expand_by(data.shape).independent(1), obs=data)

    def guide(data):
        pass

    elbo = Elbo(num_particles=num_particles, strict_enumeration_warning=False)
    inference = SVI(model, guide, Adam({"lr": 1e-6}), elbo)
    for i in range(100):
        inference.step(data)
예제 #12
0
파일: tree_data.py 프로젝트: lewisKit/pyro
def main(args):
    pyro.set_rng_seed(0)
    pyro.enable_validation()

    optim = Adam({"lr": 0.1})
    inference = SVI(model, guide, optim, loss=Trace_ELBO())

    # Data is an arbitrary json-like structure with tensors at leaves.
    one = torch.tensor(1.0)
    data = {
        "foo": one,
        "bar": [0 * one, 1 * one, 2 * one],
        "baz": {
            "noun": {
                "concrete": 4 * one,
                "abstract": 6 * one,
            },
            "verb": 2 * one,
        },
    }

    print('Step\tLoss')
    loss = 0.0
    for step in range(args.num_epochs):
        loss += inference.step(data)
        if step and step % 10 == 0:
            print('{}\t{:0.5g}'.format(step, loss))
            loss = 0.0

    print('Parameters:')
    for name in sorted(pyro.get_param_store().get_all_param_names()):
        print('{} = {}'.format(name, pyro.param(name).detach().cpu().numpy()))
    def do_elbo_test(self, reparameterized, n_steps, lr, prec, difficulty=1.0):
        n_repa_nodes = torch.sum(self.which_nodes_reparam) if not reparameterized else self.N
        logger.info(" - - - - - DO GAUSSIAN %d-CHAIN ELBO TEST  [reparameterized = %s; %d/%d] - - - - - " %
                    (self.N, reparameterized, n_repa_nodes, self.N))
        if self.N < 0:
            def array_to_string(y):
                return str(map(lambda x: "%.3f" % x.detach().cpu().numpy()[0], y))

            logger.debug("lambdas: " + array_to_string(self.lambdas))
            logger.debug("target_mus: " + array_to_string(self.target_mus[1:]))
            logger.debug("target_kappas: "******"lambda_posts: " + array_to_string(self.lambda_posts[1:]))
            logger.debug("lambda_tilde_posts: " + array_to_string(self.lambda_tilde_posts))
            pyro.clear_param_store()

        adam = optim.Adam({"lr": lr, "betas": (0.95, 0.999)})
        elbo = TraceGraph_ELBO()
        loss_and_grads = elbo.loss_and_grads
        # loss_and_grads = elbo.jit_loss_and_grads  # This fails.
        svi = SVI(self.model, self.guide, adam, loss=elbo.loss, loss_and_grads=loss_and_grads)

        for step in range(n_steps):
            t0 = time.time()
            svi.step(reparameterized=reparameterized, difficulty=difficulty)

            if step % 5000 == 0 or step == n_steps - 1:
                kappa_errors, log_sig_errors, loc_errors = [], [], []
                for k in range(1, self.N + 1):
                    if k != self.N:
                        kappa_error = param_mse("kappa_q_%d" % k, self.target_kappas[k])
                        kappa_errors.append(kappa_error)

                    loc_errors.append(param_mse("loc_q_%d" % k, self.target_mus[k]))
                    log_sig_error = param_mse("log_sig_q_%d" % k, -0.5 * torch.log(self.lambda_posts[k]))
                    log_sig_errors.append(log_sig_error)

                max_errors = (np.max(loc_errors), np.max(log_sig_errors), np.max(kappa_errors))
                min_errors = (np.min(loc_errors), np.min(log_sig_errors), np.min(kappa_errors))
                mean_errors = (np.mean(loc_errors), np.mean(log_sig_errors), np.mean(kappa_errors))
                logger.debug("[max errors]   (loc, log_scale, kappa) = (%.4f, %.4f, %.4f)" % max_errors)
                logger.debug("[min errors]   (loc, log_scale, kappa) = (%.4f, %.4f, %.4f)" % min_errors)
                logger.debug("[mean errors]  (loc, log_scale, kappa) = (%.4f, %.4f, %.4f)" % mean_errors)
                logger.debug("[step time = %.3f;  N = %d;  step = %d]\n" % (time.time() - t0, self.N, step))

        assert_equal(0.0, max_errors[0], prec=prec)
        assert_equal(0.0, max_errors[1], prec=prec)
        assert_equal(0.0, max_errors[2], prec=prec)
예제 #14
0
def test_inference_deepGP():
    gp1 = GPRegression(X, None, kernel, name="GPR1")
    Z, _ = gp1.model()
    gp2 = VariationalSparseGP(Z, y2D, Matern32(input_dim=3), Z.clone(),
                              likelihood, name="GPR2")

    def model():
        Z, _ = gp1.model()
        gp2.set_data(Z, y2D)
        gp2.model()

    def guide():
        gp1.guide()
        gp2.guide()

    svi = SVI(model, guide, optim.Adam({}), Trace_ELBO())
    svi.step()
예제 #15
0
파일: test_advi.py 프로젝트: lewisKit/pyro
def test_irange_smoke(auto_class, Elbo):

    def model():
        x = pyro.sample("x", dist.Normal(0, 1))
        assert x.shape == ()

        for i in pyro.irange("irange", 3):
            y = pyro.sample("y_{}".format(i), dist.Normal(0, 1).expand_by([2, 1 + i, 2]).independent(3))
            assert y.shape == (2, 1 + i, 2)

        z = pyro.sample("z", dist.Normal(0, 1).expand_by([2]).independent(1))
        assert z.shape == (2,)

        pyro.sample("obs", dist.Bernoulli(0.1), obs=torch.tensor(0))

    guide = auto_class(model)
    infer = SVI(model, guide, Adam({"lr": 1e-6}), Elbo(strict_enumeration_warning=False))
    infer.step()
예제 #16
0
파일: test_advi.py 프로젝트: lewisKit/pyro
def test_median(auto_class, Elbo):

    def model():
        pyro.sample("x", dist.Normal(0.0, 1.0))
        pyro.sample("y", dist.LogNormal(0.0, 1.0))
        pyro.sample("z", dist.Beta(2.0, 2.0))

    guide = auto_class(model)
    infer = SVI(model, guide, Adam({'lr': 0.05}), Elbo(strict_enumeration_warning=False))
    for _ in range(100):
        infer.step()

    median = guide.median()
    assert_equal(median["x"], torch.tensor(0.0), prec=0.1)
    if auto_class is AutoDelta:
        assert_equal(median["y"], torch.tensor(-1.0).exp(), prec=0.1)
    else:
        assert_equal(median["y"], torch.tensor(1.0), prec=0.1)
    assert_equal(median["z"], torch.tensor(0.5), prec=0.1)
예제 #17
0
    def test_elbo_with_transformed_distribution(self):
        if self.verbose:
            print(" - - - - - DO LOGNORMAL-NORMAL ELBO TEST [uses TransformedDistribution] - - - - - ")
        pyro.clear_param_store()

        def model():
            mu_latent = pyro.sample("mu_latent", dist.normal,
                                    self.mu0, torch.pow(self.tau0, -0.5))
            bijector = AffineExp(torch.pow(self.tau, -0.5), mu_latent)
            x_dist = TransformedDistribution(dist.normal, bijector)
            pyro.observe("obs0", x_dist, self.data[0], ng_zeros(1), ng_ones(1))
            pyro.observe("obs1", x_dist, self.data[1], ng_zeros(1), ng_ones(1))
            return mu_latent

        def guide():
            mu_q_log = pyro.param(
                "mu_q_log",
                Variable(
                    self.log_mu_n.data +
                    0.17,
                    requires_grad=True))
            tau_q_log = pyro.param("tau_q_log", Variable(self.log_tau_n.data - 0.143,
                                                         requires_grad=True))
            mu_q, tau_q = torch.exp(mu_q_log), torch.exp(tau_q_log)
            pyro.sample("mu_latent", dist.normal, mu_q, torch.pow(tau_q, -0.5))

        adam = optim.Adam({"lr": 0.001, "betas": (0.95, 0.999)})
        svi = SVI(model, guide, adam, loss="ELBO", trace_graph=True)

        for k in range(7000):
            svi.step()

            mu_error = param_abs_error("mu_q_log", self.log_mu_n)
            tau_error = param_abs_error("tau_q_log", self.log_tau_n)

            if k % 500 == 0 and self.verbose:
                print("mu_error, tau_error = %.4f, %.4f" % (mu_error, tau_error))

        self.assertEqual(0.0, mu_error, prec=0.05)
        self.assertEqual(0.0, tau_error, prec=0.05)
예제 #18
0
    def test_elbo_nonreparameterized(self):
        if self.verbose:
            print(" - - - - - DO POISSON-GAMMA ELBO TEST - - - - - ")
        pyro.clear_param_store()

        def model():
            lambda_latent = pyro.sample("lambda_latent", dist.gamma, self.alpha0, self.beta0)
            for i, x in enumerate(self.data):
                pyro.observe("obs_{}".format(i), dist.poisson, x, lambda_latent)
            return lambda_latent

        def guide():
            alpha_q_log = pyro.param(
                "alpha_q_log",
                Variable(
                    self.log_alpha_n.data +
                    0.17,
                    requires_grad=True))
            beta_q_log = pyro.param(
                "beta_q_log",
                Variable(
                    self.log_beta_n.data -
                    0.143,
                    requires_grad=True))
            alpha_q, beta_q = torch.exp(alpha_q_log), torch.exp(beta_q_log)
            pyro.sample("lambda_latent", dist.gamma, alpha_q, beta_q,
                        baseline=dict(use_decaying_avg_baseline=True))

        adam = optim.Adam({"lr": .0007, "betas": (0.95, 0.999)})
        svi = SVI(model, guide, adam, loss="ELBO", trace_graph=True)

        for k in range(7000):
            svi.step()
            alpha_error = param_abs_error("alpha_q_log", self.log_alpha_n)
            beta_error = param_abs_error("beta_q_log", self.log_beta_n)
            if k % 500 == 0 and self.verbose:
                print("alpha_q_log_error, beta_q_log_error: %.4f, %.4f" % (alpha_error, beta_error))

        self.assertEqual(0.0, alpha_error, prec=0.08)
        self.assertEqual(0.0, beta_error, prec=0.08)
예제 #19
0
    def do_elbo_test(self, reparameterized, n_steps):
        if self.verbose:
            print(" - - - - - DO NORMALNORMAL ELBO TEST  [reparameterized = %s] - - - - - " % reparameterized)
        pyro.clear_param_store()

        def model():
            mu_latent = pyro.sample(
                    "mu_latent",
                    dist.Normal(self.mu0, torch.pow(self.lam0, -0.5), reparameterized=reparameterized))
            for i, x in enumerate(self.data):
                pyro.observe("obs_%d" % i, dist.normal, x, mu_latent,
                             torch.pow(self.lam, -0.5))
            return mu_latent

        def guide():
            mu_q = pyro.param("mu_q", Variable(self.analytic_mu_n.data + 0.334 * torch.ones(2),
                                               requires_grad=True))
            log_sig_q = pyro.param("log_sig_q", Variable(
                                   self.analytic_log_sig_n.data - 0.29 * torch.ones(2),
                                   requires_grad=True))
            sig_q = torch.exp(log_sig_q)
            mu_latent = pyro.sample("mu_latent",
                                    dist.Normal(mu_q, sig_q, reparameterized=reparameterized),
                                    baseline=dict(use_decaying_avg_baseline=True))
            return mu_latent

        adam = optim.Adam({"lr": .0015, "betas": (0.97, 0.999)})
        svi = SVI(model, guide, adam, loss="ELBO", trace_graph=True)

        for k in range(n_steps):
            svi.step()

            mu_error = param_mse("mu_q", self.analytic_mu_n)
            log_sig_error = param_mse("log_sig_q", self.analytic_log_sig_n)
            if k % 250 == 0 and self.verbose:
                print("mu error, log(sigma) error:  %.4f, %.4f" % (mu_error, log_sig_error))

        self.assertEqual(0.0, mu_error, prec=0.03)
        self.assertEqual(0.0, log_sig_error, prec=0.03)
예제 #20
0
    def do_elbo_test(self, reparameterized, n_steps, beta1, lr):
        if self.verbose:
            print(" - - - - - DO LOGNORMAL-NORMAL ELBO TEST [repa = %s] - - - - - " % reparameterized)
        pyro.clear_param_store()
        pt_guide = LogNormalNormalGuide(self.log_mu_n.data + 0.17,
                                        self.log_tau_n.data - 0.143)

        def model():
            mu_latent = pyro.sample("mu_latent", dist.normal,
                                    self.mu0, torch.pow(self.tau0, -0.5))
            sigma = torch.pow(self.tau, -0.5)
            pyro.observe("obs0", dist.lognormal, self.data[0], mu_latent, sigma)
            pyro.observe("obs1", dist.lognormal, self.data[1], mu_latent, sigma)
            return mu_latent

        def guide():
            pyro.module("mymodule", pt_guide)
            mu_q, tau_q = torch.exp(pt_guide.mu_q_log), torch.exp(pt_guide.tau_q_log)
            sigma = torch.pow(tau_q, -0.5)
            pyro.sample("mu_latent",
                        dist.Normal(mu_q, sigma, reparameterized=reparameterized),
                        baseline=dict(use_decaying_avg_baseline=True))

        adam = optim.Adam({"lr": lr, "betas": (beta1, 0.999)})
        svi = SVI(model, guide, adam, loss="ELBO", trace_graph=True)

        for k in range(n_steps):
            svi.step()

            mu_error = param_abs_error("mymodule$$$mu_q_log", self.log_mu_n)
            tau_error = param_abs_error("mymodule$$$tau_q_log", self.log_tau_n)
            if k % 500 == 0 and self.verbose:
                print("mu_error, tau_error = %.4f, %.4f" % (mu_error, tau_error))

        self.assertEqual(0.0, mu_error, prec=0.05)
        self.assertEqual(0.0, tau_error, prec=0.05)
예제 #21
0
파일: mixture.py 프로젝트: lewisKit/pyro
def main(args):
    pyro.set_rng_seed(0)
    pyro.enable_validation()

    optim = Adam({"lr": 0.1})
    inference = SVI(model, guide, optim, loss=Trace_ELBO())
    data = torch.tensor([0.0, 1.0, 2.0, 20.0, 30.0, 40.0])
    k = 2

    print('Step\tLoss')
    loss = 0.0
    for step in range(args.num_epochs):
        if step and step % 10 == 0:
            print('{}\t{:0.5g}'.format(step, loss))
            loss = 0.0
        loss += inference.step(data, k)

    print('Parameters:')
    for name in sorted(pyro.get_param_store().get_all_param_names()):
        print('{} = {}'.format(name, pyro.param(name).detach().cpu().numpy()))
예제 #22
0
    def do_test_per_param_optim(self, fixed_param, free_param):
        pyro.clear_param_store()

        def model():
            prior_dist = Normal(self.mu0, torch.pow(self.lam0, -0.5))
            mu_latent = pyro.sample("mu_latent", prior_dist)
            x_dist = Normal(mu_latent, torch.pow(self.lam, -0.5))
            pyro.observe("obs", x_dist, self.data)
            return mu_latent

        def guide():
            mu_q = pyro.param(
                "mu_q",
                Variable(
                    torch.zeros(1),
                    requires_grad=True))
            log_sig_q = pyro.param(
                "log_sig_q", Variable(
                    torch.zeros(1), requires_grad=True))
            sig_q = torch.exp(log_sig_q)
            pyro.sample("mu_latent", Normal(mu_q, sig_q))

        def optim_params(module_name, param_name, tags):
            if param_name == fixed_param:
                return {'lr': 0.00}
            elif param_name == free_param:
                return {'lr': 0.01}

        adam = optim.Adam(optim_params)
        adam2 = optim.Adam(optim_params)
        svi = SVI(model, guide, adam, loss="ELBO", trace_graph=True)
        svi2 = SVI(model, guide, adam2, loss="ELBO", trace_graph=True)

        svi.step()
        adam_initial_step_count = list(adam.get_state()['mu_q']['state'].items())[0][1]['step']
        adam.save('adam.unittest.save')
        svi.step()
        adam_final_step_count = list(adam.get_state()['mu_q']['state'].items())[0][1]['step']
        adam2.load('adam.unittest.save')
        svi2.step()
        adam2_step_count_after_load_and_step = list(adam2.get_state()['mu_q']['state'].items())[0][1]['step']

        assert adam_initial_step_count == 1
        assert adam_final_step_count == 2
        assert adam2_step_count_after_load_and_step == 2

        free_param_unchanged = torch.equal(pyro.param(free_param).data, torch.zeros(1))
        fixed_param_unchanged = torch.equal(pyro.param(fixed_param).data, torch.zeros(1))
        assert fixed_param_unchanged and not free_param_unchanged
예제 #23
0
def main(args):
    # load data
    print('loading training data...')
    if not os.path.exists('faces_training.csv'):
        wget.download('https://d2fefpcigoriu7.cloudfront.net/datasets/faces_training.csv', 'faces_training.csv')
    data = torch.tensor(np.loadtxt('faces_training.csv', delimiter=',')).float()

    sparse_gamma_def = SparseGammaDEF()
    opt = optim.AdagradRMSProp({"eta": 4.5, "t": 0.1})
    svi = SVI(sparse_gamma_def.model, sparse_gamma_def.guide, opt, loss=Trace_ELBO())

    print('\nbeginning training...')

    # the training loop
    for k in range(args.num_epochs):
        loss = svi.step(data)
        sparse_gamma_def.clip_params()  # we clip params after each gradient step

        if k % 20 == 0 and k > 0:
            print("[epoch %04d] training elbo: %.4g" % (k, -loss))
예제 #24
0
def main(args):
    pyro.set_rng_seed(0)
    pyro.clear_param_store()
    K = 2

    data = torch.tensor([0.0, 1.0, 2.0, 20.0, 30.0, 40.0])
    optim = pyro.optim.Adam({'lr': 0.1})
    inference = SVI(model,
                    config_enumerate(guide),
                    optim,
                    loss=TraceEnum_ELBO(max_plate_nesting=1))

    print('Step\tLoss')
    loss = 0.0
    for step in range(args.num_epochs):
        if step and step % 10 == 0:
            print('{}\t{:0.5g}'.format(step, loss))
            loss = 0.0
        loss += inference.step(K, data)

    print('Parameters:')
    for name, value in sorted(pyro.get_param_store().items()):
        print('{} = {}'.format(name, value.detach().cpu().numpy()))
예제 #25
0
파일: model.py 프로젝트: lewisKit/pyro
    def optimize(self, optimizer=None, loss=None, num_steps=1000):
        """
        A convenient method to optimize parameters for the Gaussian Process model
        using :class:`~pyro.infer.svi.SVI`.

        :param PyroOptim optimizer: A Pyro optimizer.
        :param ELBO loss: A Pyro loss instance.
        :param int num_steps: Number of steps to run SVI.
        :returns: a list of losses during the training procedure
        :rtype: list
        """
        if optimizer is None:
            optimizer = Adam({})
        if not isinstance(optimizer, PyroOptim):
            raise ValueError("Optimizer should be an instance of "
                             "pyro.optim.PyroOptim class.")
        if loss is None:
            loss = Trace_ELBO()
        svi = SVI(self.model, self.guide, optimizer, loss=loss)
        losses = []
        for i in range(num_steps):
            losses.append(svi.step())
        return losses
예제 #26
0
    def fit(self, X, Y, verbose=False):
        optim = Adam({
            "lr": self.weight_decay,
            "weight_decay": self.weight_decay
        })
        svi = SVI(self.model, self.guide, optim, loss=Trace_ELBO())

        data = np.concatenate((X, Y), axis=1)
        data = Variable(torch.from_numpy(data).type(torch.FloatTensor))

        loss_log = []
        for epoch in range(self.epochs):

            loss = svi.step(data)
            loss_log.append(loss)

            if verbose:
                if epoch % 100 == 0:
                    print('Epoch {} loss: {}'.format(epoch + 1, loss))

            # Save best model
            if loss <= min(loss_log):
                self.best_model = self.guide
예제 #27
0
def test_auto_dirichlet(auto_class, Elbo):
    num_steps = 2000
    prior = torch.tensor([0.5, 1.0, 1.5, 3.0])
    data = torch.tensor([0] * 4 + [1] * 2 + [2] * 5).long()
    posterior = torch.tensor([4.5, 3.0, 6.5, 3.0])

    def model(data):
        p = pyro.sample("p", dist.Dirichlet(prior))
        with pyro.plate("data_plate"):
            pyro.sample("data", dist.Categorical(p).expand_by(data.shape), obs=data)

    guide = auto_class(model)
    svi = SVI(model, guide, optim.Adam({"lr": .003}), loss=Elbo())

    for _ in range(num_steps):
        loss = svi.step(data)
        assert np.isfinite(loss), loss

    expected_mean = posterior / posterior.sum()
    actual_mean = biject_to(constraints.simplex)(guide.loc)
    assert_equal(actual_mean, expected_mean, prec=0.2, msg=''.join([
        '\nexpected {}'.format(expected_mean.detach().cpu().numpy()),
        '\n  actual {}'.format(actual_mean.detach().cpu().numpy())]))
예제 #28
0
    def fit(self, x: torch.Tensor) -> MixtureModel:
        def init_loc_fn(site):
            K = self.num_components
            if site["name"] == "weights":
                return torch.ones(K) / K
            if site["name"] == "scales":
                return torch.tensor([[(x.var() / 2).sqrt()] * 2] * K)
            if site["name"] == "locs":
                return x[torch.multinomial(torch.ones(x.shape[0]) / x.shape[0], K), :]
            raise ValueError(site["name"])

        self.guide = AutoDelta(poutine.block(self.model, expose=['weights', 'locs', 'scales']),
                               init_loc_fn=init_loc_fn)

        optim = pyro.optim.Adam({'lr': 0.1, 'betas': [0.8, 0.99]})
        loss = TraceEnum_ELBO(max_plate_nesting=1)

        svi = SVI(self.model, self.guide, optim, loss=loss)

        for i in range(self.optim_steps):
            elbo = svi.step(x)
            self.history["loss"].append(elbo)
        return self
예제 #29
0
def main(_argv):
    transition_alphas = torch.tensor([[10., 90.],
                                      [90., 10.]])
    emission_alphas = torch.tensor([[[30., 20., 5.]],
                                    [[5., 10., 100.]]])
    lengths = torch.randint(10, 30, (10000,))
    trace = poutine.trace(model).get_trace(transition_alphas, emission_alphas, lengths)
    obs_sequences = [site['value'] for name, site in
                     trace.nodes.items() if name.startswith("element_")]
    obs_sequences = torch.stack(obs_sequences, dim=-2)
    guide = AutoDelta(poutine.block(model, hide_fn=lambda site: site['name'].startswith('state')),
                      init_loc_fn=init_to_sample)
    svi = SVI(model, guide, Adam(dict(lr=0.1)), JitTraceEnum_ELBO())
    total = 1000
    with tqdm.trange(total) as t:
        for i in t:
            loss = svi.step(0.5 * torch.ones((2, 2), dtype=torch.float),
                            0.3 * torch.ones((2, 1, 3), dtype=torch.float),
                            lengths, obs_sequences)
            t.set_description_str(f"SVI ({i}/{total}): {loss}")
    median = guide.median()
    print("Transition probs: ", median['transition_probs'].detach().numpy())
    print("Emission probs: ", median['emission_probs'].squeeze().detach().numpy())
예제 #30
0
파일: SPIRE.py 프로젝트: ianmbus/XID_plus
def all_bands(priors,
              lr=0.005,
              n_steps=1000,
              n_samples=1000,
              verbose=True,
              sub=1):
    from pyro.infer import Predictive

    pyro.clear_param_store()

    guide = AutoMultivariateNormal(spire_model, init_loc_fn=init_to_mean)

    svi = SVI(spire_model, guide, optim.Adam({"lr": lr}), loss=Trace_ELBO())

    loss_history = []
    for i in range(n_steps):
        loss = svi.step(priors, sub=sub)
        if (i % 100 == 0) and verbose:
            print('ELBO loss: {}'.format(loss))
        loss_history.append(loss)
    print('ELBO loss: {}'.format(loss))
    predictive = Predictive(spire_model, guide=guide, num_samples=n_samples)
    samples = {
        k: v.squeeze(-1).detach().cpu().numpy()
        for k, v in predictive(priors).items() if k != "obs"
    }
    f_low_lim = torch.tensor([p.prior_flux_lower for p in priors],
                             dtype=torch.float)
    f_up_lim = torch.tensor([p.prior_flux_upper for p in priors],
                            dtype=torch.float)
    f_vec_multi = (f_up_lim -
                   f_low_lim) * samples['src_f'][..., :, :] + f_low_lim
    samples['src_f'] = f_vec_multi.squeeze(-3).numpy()
    samples['sigma_conf'] = samples['sigma_conf'].squeeze(-1).squeeze(-2)
    samples['bkg'] = samples['bkg'].squeeze(-1).squeeze(-2)

    return {'loss_history': loss_history, 'samples': samples}
예제 #31
0
    def fit(self, T, W, X, Y, S=1e-7):
        data = self.get_data_dict(T, W, X, Y, S)

        lr = self.lr
        svi = SVI(self.model, self.guide, Adam({'lr': lr}), loss=Trace_ELBO())

        lc = []
        lt = []
        pyro.set_rng_seed(0)
        pyro.clear_param_store()
        if self.notebook: from tqdm.notebook import tqdm
        else: from tqdm import tqdm

        for i in tqdm(range(self.n_iter)):
            elbo = svi.step(data)
            lt.append(elbo)

            if i and not i % (self.n_iter // self.n_stp):
                lr *= .1
                svi = SVI(self.model,
                          self.guide,
                          Adam({'lr': lr}),
                          loss=Trace_ELBO())

            if not i % (self.n_iter // 20):
                with torch.no_grad():
                    lc.append(sum(lt) / len(lt))
                    lt = []
                    pars = self.guide()
                    distr = self.get_distr(data, pars)
                    llk = distr.log_prob(data['Y']).mean().item()
                    r2 = np.corrcoef(distr.mean.view(-1),
                                     data['Y'].view(-1))[0, 1]**2
                    print(
                        '%d\t\tELBO: %.2E - LLK: %.2E - r2: %.3f - lr: %.2E' %
                        (i, elbo, llk, r2, lr))
                    self.lrn_crvs.append((i, elbo, llk, r2, lr))
예제 #32
0
    def _train_full_data(self, x_data, obs2sample, n_epochs=20000, lr=0.002):

        idx = np.arange(x_data.shape[0]).astype("int64")

        device = torch.device("cuda")
        idx = torch.tensor(idx).to(device)
        x_data = torch.tensor(x_data).to(device)
        obs2sample = torch.tensor(obs2sample).to(device)

        self.to(device)

        pyro.clear_param_store()
        self.guide(x_data, idx, obs2sample)

        svi = SVI(
            self.model,
            self.guide,
            optim.ClippedAdam({
                "lr": lr,
                "clip_norm": 200
            }),
            loss=Trace_ELBO(),
        )

        iter_iterator = tqdm(range(n_epochs))
        hist = []
        for it in iter_iterator:

            loss = svi.step(x_data, idx, obs2sample)
            iter_iterator.set_description("Epoch " + "{:d}".format(it) +
                                          ", -ELBO: " + "{:.4e}".format(loss))
            hist.append(loss)

            if it % 500 == 0:
                torch.cuda.empty_cache()

        self.hist = hist
예제 #33
0
def test_ss_mle(dim, dist):
    base_dist = dist[0](*(torch.tensor(param).expand((dim, ))
                          for param in dist[1])).to_event(1)

    skewness_tar = _skewness(base_dist.event_shape)
    data = SineSkewed(base_dist, skewness_tar).sample((1000, ))

    def model(data, batch_shape):
        skews = []
        for i in range(dim):
            skews.append(
                pyro.param(
                    f"skew{i}",
                    0.5 * torch.ones(batch_shape),
                    constraint=constraints.interval(-1, 1),
                ))

        skewness = torch.stack(skews, dim=-1)
        with pyro.plate("data", data.size(-len(data.size()))):
            pyro.sample("obs", SineSkewed(base_dist, skewness), obs=data)

    def guide(data, batch_shape):
        pass

    pyro.clear_param_store()
    adam = Adam({"lr": 0.1})
    svi = SVI(model, guide, adam, loss=Trace_ELBO())

    losses = []
    steps = 80
    for step in range(steps):
        losses.append(svi.step(data, base_dist.batch_shape))

    act_skewness = torch.stack(
        [v for k, v in pyro.get_param_store().items() if "skew" in k], dim=-1)
    assert_equal(act_skewness, skewness_tar, 1e-1)
예제 #34
0
def main(args):
    logging.info('Generating data')
    pyro.set_rng_seed(0)
    pyro.clear_param_store()

    # We can generate synthetic data directly by calling the model.
    true_topic_weights, true_topic_words, data = model(args=args)

    # We'll train using SVI.
    logging.info('-' * 40)
    logging.info('Training on {} documents'.format(args.num_docs))
    predictor = make_predictor(args)
    guide = functools.partial(parametrized_guide, predictor)
    Elbo = JitTraceEnum_ELBO if args.jit else TraceEnum_ELBO
    elbo = Elbo(max_plate_nesting=2)
    optim = ClippedAdam({'lr': args.learning_rate})
    svi = SVI(model, guide, optim, elbo)
    logging.info('Step\tLoss')
    for step in range(args.num_steps):
        loss = svi.step(data, args=args, batch_size=args.batch_size)
        if step % 10 == 0:
            logging.info('{: >5d}\t{}'.format(step, loss))
    loss = elbo.loss(model, guide, data, args=args)
    logging.info('final loss = {}'.format(loss))
예제 #35
0
def test_non_nested_plating_sum():
    """Example from https://github.com/pyro-ppl/pyro/issues/2361"""

    # Generative model: data = x @ weights + eps
    def model(data, weights):
        loc = torch.tensor(1.0)
        scale = torch.tensor(0.1)

        # Sample latents (shares no dimensions with data)
        with pyro.plate("x_plate", weights.shape[0]):
            x = pyro.sample("x", pyro.distributions.Normal(loc, scale))

        # Combine with weights and sample
        with pyro.plate("data_plate_1", data.shape[-1]):
            with pyro.plate("data_plate_2", data.shape[-2]):
                pyro.sample("data",
                            pyro.distributions.Normal(x @ weights, scale),
                            obs=data)

    def guide(data, weights):
        loc = pyro.param("x_loc", torch.tensor(0.5))
        scale = torch.tensor(0.1)

        with pyro.plate("x_plate", weights.shape[0]):
            pyro.sample("x", pyro.distributions.Normal(loc, scale))

    data = torch.randn([5, 3])
    weights = torch.randn([2, 3])
    adam = optim.Adam({"lr": 0.01})
    loss_fn = RenyiELBO(num_particles=30, vectorize_particles=True)
    svi = SVI(model, guide, adam, loss_fn)

    for step in range(1):
        loss = svi.step(data, weights)
        if step % 20 == 0:
            logger.info("step {} loss = {:0.4g}".format(step, loss))
예제 #36
0
    def infer_posterior(self,
                        iter_steps=10000,
                        num_particles=100,
                        optim_kwargs={'lr': .01}):
        """Perform SVI over free model parameters.
        """

        clear_param_store()

        svi = SVI(model=self.model,
                  guide=self.guide,
                  optim=Adam(optim_kwargs),
                  loss=TraceEnum_ELBO(num_particles=num_particles,
                                      vectorize_particles=True))

        loss = []
        pbar = tqdm(range(iter_steps), position=0)
        for step in pbar:
            loss.append(svi.step())
            pbar.set_description("Mean ELBO %6.2f" % tensor(loss[-20:]).mean())
            if np.isnan(loss[-1]):
                break

        self.loss = loss
예제 #37
0
def test_sequential_plating_sum():
    """Example from https://github.com/pyro-ppl/pyro/issues/2361"""
    def model(data):
        x = pyro.sample("x", dist.Bernoulli(torch.tensor(0.5)))
        for i in pyro.plate("data_plate", len(data)):
            pyro.sample(
                "data_{:d}".format(i),
                dist.Normal(x, scale=torch.tensor(0.1)),
                obs=data[i],
            )

    def guide(data):
        p = pyro.param("p", torch.tensor(0.5))
        pyro.sample("x", pyro.distributions.Bernoulli(p))

    data = torch.cat([torch.randn([5]), 1.0 + torch.randn([5])])
    adam = optim.Adam({"lr": 0.01})
    loss_fn = RenyiELBO(alpha=0, num_particles=30, vectorize_particles=True)
    svi = SVI(model, guide, adam, loss_fn)

    for step in range(1):
        loss = svi.step(data)
        if step % 20 == 0:
            logger.info("step {} loss = {:0.4g}".format(step, loss))
예제 #38
0
def test_reparam_stable():
    data = dist.Poisson(torch.randn(8).exp()).sample()

    @poutine.reparam(config={"dz": LatentStableReparam(), "y": LatentStableReparam()})
    def model():
        stability = pyro.sample("stability", dist.Uniform(1., 2.))
        trans_skew = pyro.sample("trans_skew", dist.Uniform(-1., 1.))
        obs_skew = pyro.sample("obs_skew", dist.Uniform(-1., 1.))
        scale = pyro.sample("scale", dist.Gamma(3, 1))

        # We use separate plates because the .cumsum() op breaks independence.
        with pyro.plate("time1", len(data)):
            dz = pyro.sample("dz", dist.Stable(stability, trans_skew))
        z = dz.cumsum(-1)
        with pyro.plate("time2", len(data)):
            y = pyro.sample("y", dist.Stable(stability, obs_skew, scale, z))
            pyro.sample("x", dist.Poisson(y.abs()), obs=data)

    guide = AutoDelta(model)
    svi = SVI(model, guide, optim.Adam({"lr": 0.01}), Trace_ELBO())
    for step in range(100):
        loss = svi.step()
        if step % 20 == 0:
            logger.info("step {} loss = {:0.4g}".format(step, loss))
예제 #39
0
def main(args):
    if args.cuda:
        torch.set_default_tensor_type('torch.cuda.FloatTensor')

    logging.info('Loading data')
    data = poly.load_data(poly.JSB_CHORALES)

    logging.info('-' * 40)
    model = models[args.model]
    logging.info('Training {} on {} sequences'.format(
        model.__name__, len(data['train']['sequences'])))
    sequences = data['train']['sequences']
    lengths = data['train']['sequence_lengths']

    # find all the notes that are present at least once in the training set
    present_notes = ((sequences == 1).sum(0).sum(0) > 0)
    # remove notes that are never played (we remove 37/88 notes)
    sequences = sequences[..., present_notes]

    if args.truncate:
        lengths.clamp_(max=args.truncate)
        sequences = sequences[:, :args.truncate]
    num_observations = float(lengths.sum())
    pyro.set_rng_seed(0)
    pyro.clear_param_store()
    pyro.enable_validation(True)

    # We'll train using MAP Baum-Welch, i.e. MAP estimation while marginalizing
    # out the hidden state x. This is accomplished via an automatic guide that
    # learns point estimates of all of our conditional probability tables,
    # named probs_*.
    guide = AutoDelta(
        poutine.block(model,
                      expose_fn=lambda msg: msg["name"].startswith("probs_")))

    # To help debug our tensor shapes, let's print the shape of each site's
    # distribution, value, and log_prob tensor. Note this information is
    # automatically printed on most errors inside SVI.
    if args.print_shapes:
        first_available_dim = -2 if model is model_0 else -3
        guide_trace = poutine.trace(guide).get_trace(
            sequences, lengths, args=args, batch_size=args.batch_size)
        model_trace = poutine.trace(
            poutine.replay(poutine.enum(model, first_available_dim),
                           guide_trace)).get_trace(sequences,
                                                   lengths,
                                                   args=args,
                                                   batch_size=args.batch_size)
        logging.info(model_trace.format_shapes())

    # Enumeration requires a TraceEnum elbo and declaring the max_plate_nesting.
    # All of our models have two plates: "data" and "tones".
    Elbo = JitTraceEnum_ELBO if args.jit else TraceEnum_ELBO
    elbo = Elbo(max_plate_nesting=1 if model is model_0 else 2)
    optim = Adam({'lr': args.learning_rate})
    svi = SVI(model, guide, optim, elbo)

    # We'll train on small minibatches.
    logging.info('Step\tLoss')
    for step in range(args.num_steps):
        loss = svi.step(sequences,
                        lengths,
                        args=args,
                        batch_size=args.batch_size)
        logging.info('{: >5d}\t{}'.format(step, loss / num_observations))

    # We evaluate on the entire training dataset,
    # excluding the prior term so our results are comparable across models.
    train_loss = elbo.loss(model,
                           guide,
                           sequences,
                           lengths,
                           args,
                           include_prior=False)
    logging.info('training loss = {}'.format(train_loss / num_observations))

    # Finally we evaluate on the test dataset.
    logging.info('-' * 40)
    logging.info('Evaluating on {} test sequences'.format(
        len(data['test']['sequences'])))
    sequences = data['test']['sequences'][..., present_notes]
    lengths = data['test']['sequence_lengths']
    if args.truncate:
        lengths.clamp_(max=args.truncate)
    num_observations = float(lengths.sum())

    # note that since we removed unseen notes above (to make the problem a bit easier and for
    # numerical stability) this test loss may not be directly comparable to numbers
    # reported on this dataset elsewhere.
    test_loss = elbo.loss(model,
                          guide,
                          sequences,
                          lengths,
                          args=args,
                          include_prior=False)
    logging.info('test loss = {}'.format(test_loss / num_observations))

    # We expect models with higher capacity to perform better,
    # but eventually overfit to the training set.
    capacity = sum(
        value.reshape(-1).size(0) for value in pyro.get_param_store().values())
    logging.info('{} capacity = {} parameters'.format(model.__name__,
                                                      capacity))
예제 #40
0
파일: hmm.py 프로젝트: pyro-ppl/pyro
def main(args):
    if args.cuda:
        torch.set_default_tensor_type("torch.cuda.FloatTensor")

    logging.info("Loading data")
    data = poly.load_data(poly.JSB_CHORALES)

    logging.info("-" * 40)
    model = models[args.model]
    logging.info("Training {} on {} sequences".format(
        model.__name__, len(data["train"]["sequences"])))
    sequences = data["train"]["sequences"]
    lengths = data["train"]["sequence_lengths"]

    # find all the notes that are present at least once in the training set
    present_notes = (sequences == 1).sum(0).sum(0) > 0
    # remove notes that are never played (we remove 37/88 notes)
    sequences = sequences[..., present_notes]

    if args.truncate:
        lengths = lengths.clamp(max=args.truncate)
        sequences = sequences[:, :args.truncate]
    num_observations = float(lengths.sum())
    pyro.set_rng_seed(args.seed)
    pyro.clear_param_store()

    # We'll train using MAP Baum-Welch, i.e. MAP estimation while marginalizing
    # out the hidden state x. This is accomplished via an automatic guide that
    # learns point estimates of all of our conditional probability tables,
    # named probs_*.
    guide = AutoDelta(
        poutine.block(model,
                      expose_fn=lambda msg: msg["name"].startswith("probs_")))

    # To help debug our tensor shapes, let's print the shape of each site's
    # distribution, value, and log_prob tensor. Note this information is
    # automatically printed on most errors inside SVI.
    if args.print_shapes:
        first_available_dim = -2 if model is model_0 else -3
        guide_trace = poutine.trace(guide).get_trace(
            sequences, lengths, args=args, batch_size=args.batch_size)
        model_trace = poutine.trace(
            poutine.replay(poutine.enum(model, first_available_dim),
                           guide_trace)).get_trace(sequences,
                                                   lengths,
                                                   args=args,
                                                   batch_size=args.batch_size)
        logging.info(model_trace.format_shapes())

    # Enumeration requires a TraceEnum elbo and declaring the max_plate_nesting.
    # All of our models have two plates: "data" and "tones".
    optim = Adam({"lr": args.learning_rate})
    if args.tmc:
        if args.jit:
            raise NotImplementedError(
                "jit support not yet added for TraceTMC_ELBO")
        elbo = TraceTMC_ELBO(max_plate_nesting=1 if model is model_0 else 2)
        tmc_model = poutine.infer_config(
            model,
            lambda msg: {
                "num_samples": args.tmc_num_samples,
                "expand": False
            } if msg["infer"].get("enumerate", None) == "parallel" else {},
        )  # noqa: E501
        svi = SVI(tmc_model, guide, optim, elbo)
    else:
        Elbo = JitTraceEnum_ELBO if args.jit else TraceEnum_ELBO
        elbo = Elbo(
            max_plate_nesting=1 if model is model_0 else 2,
            strict_enumeration_warning=(model is not model_7),
            jit_options={"time_compilation": args.time_compilation},
        )
        svi = SVI(model, guide, optim, elbo)

    # We'll train on small minibatches.
    logging.info("Step\tLoss")
    for step in range(args.num_steps):
        loss = svi.step(sequences,
                        lengths,
                        args=args,
                        batch_size=args.batch_size)
        logging.info("{: >5d}\t{}".format(step, loss / num_observations))

    if args.jit and args.time_compilation:
        logging.debug("time to compile: {} s.".format(
            elbo._differentiable_loss.compile_time))

    # We evaluate on the entire training dataset,
    # excluding the prior term so our results are comparable across models.
    train_loss = elbo.loss(model,
                           guide,
                           sequences,
                           lengths,
                           args,
                           include_prior=False)
    logging.info("training loss = {}".format(train_loss / num_observations))

    # Finally we evaluate on the test dataset.
    logging.info("-" * 40)
    logging.info("Evaluating on {} test sequences".format(
        len(data["test"]["sequences"])))
    sequences = data["test"]["sequences"][..., present_notes]
    lengths = data["test"]["sequence_lengths"]
    if args.truncate:
        lengths = lengths.clamp(max=args.truncate)
    num_observations = float(lengths.sum())

    # note that since we removed unseen notes above (to make the problem a bit easier and for
    # numerical stability) this test loss may not be directly comparable to numbers
    # reported on this dataset elsewhere.
    test_loss = elbo.loss(model,
                          guide,
                          sequences,
                          lengths,
                          args=args,
                          include_prior=False)
    logging.info("test loss = {}".format(test_loss / num_observations))

    # We expect models with higher capacity to perform better,
    # but eventually overfit to the training set.
    capacity = sum(
        value.reshape(-1).size(0) for value in pyro.get_param_store().values())
    logging.info("{} capacity = {} parameters".format(model.__name__,
                                                      capacity))
예제 #41
0
# Prepare training data
df = rugged_data[["cont_africa", "rugged", "rgdppc_2000"]]
df = df[np.isfinite(df.rgdppc_2000)]
df["rgdppc_2000"] = np.log(df["rgdppc_2000"])
train = torch.tensor(df.values, dtype=torch.float)

svi = SVI(model,
          guide,
          optim.Adam({"lr": .005}),
          loss=Trace_ELBO(),
          num_samples=1000)
is_cont_africa, ruggedness, log_gdp = train[:, 0], train[:, 1], train[:, 2]
pyro.clear_param_store()
num_iters = 8000 if not smoke_test else 2
for i in range(num_iters):
    elbo = svi.step(is_cont_africa, ruggedness, log_gdp)
    if i % 500 == 0:
        logging.info("Elbo loss: {}".format(elbo))

posterior = svi.run(log_gdp, is_cont_africa, ruggedness)

sites = ["a", "bA", "bR", "bAR", "sigma"]

for site, values in summary(posterior, sites).items():
    print("Site: {}".format(site))
    print(values, "\n")


def wrapped_model(is_cont_africa, ruggedness, log_gdp):
    pyro.sample("prediction", Delta(model(is_cont_africa, ruggedness,
                                          log_gdp)))
예제 #42
0
파일: main.py 프로젝트: lewisKit/pyro
def main(**kwargs):

    args = argparse.Namespace(**kwargs)

    if 'save' in args:
        if os.path.exists(args.save):
            raise RuntimeError('Output file "{}" already exists.'.format(args.save))

    if args.seed is not None:
        pyro.set_rng_seed(args.seed)

    X, true_counts = load_data()
    X_size = X.size(0)
    if args.cuda:
        X = X.cuda()

    # Build a function to compute z_pres prior probabilities.
    if args.z_pres_prior_raw:
        def base_z_pres_prior_p(t):
            return args.z_pres_prior
    else:
        base_z_pres_prior_p = make_prior(args.z_pres_prior)

    # Wrap with logic to apply any annealing.
    def z_pres_prior_p(opt_step, time_step):
        p = base_z_pres_prior_p(time_step)
        if args.anneal_prior == 'none':
            return p
        else:
            decay = dict(lin=lin_decay, exp=exp_decay)[args.anneal_prior]
            return decay(p, args.anneal_prior_to, args.anneal_prior_begin,
                         args.anneal_prior_duration, opt_step)

    model_arg_keys = ['window_size',
                      'rnn_hidden_size',
                      'decoder_output_bias',
                      'decoder_output_use_sigmoid',
                      'baseline_scalar',
                      'encoder_net',
                      'decoder_net',
                      'predict_net',
                      'embed_net',
                      'bl_predict_net',
                      'non_linearity',
                      'pos_prior_mean',
                      'pos_prior_sd',
                      'scale_prior_mean',
                      'scale_prior_sd']
    model_args = {key: getattr(args, key) for key in model_arg_keys if key in args}
    air = AIR(
        num_steps=args.model_steps,
        x_size=50,
        use_masking=not args.no_masking,
        use_baselines=not args.no_baselines,
        z_what_size=args.encoder_latent_size,
        use_cuda=args.cuda,
        **model_args
    )

    if args.verbose:
        print(air)
        print(args)

    if 'load' in args:
        print('Loading parameters...')
        air.load_state_dict(torch.load(args.load))

    vis = visdom.Visdom(env=args.visdom_env)
    # Viz sample from prior.
    if args.viz:
        z, x = air.prior(5, z_pres_prior_p=partial(z_pres_prior_p, 0))
        vis.images(draw_many(x, tensor_to_objs(latents_to_tensor(z))))

    def per_param_optim_args(module_name, param_name):
        lr = args.baseline_learning_rate if 'bl_' in param_name else args.learning_rate
        return {'lr': lr}

    svi = SVI(air.model, air.guide,
              optim.Adam(per_param_optim_args),
              loss=TraceGraph_ELBO())

    # Do inference.
    t0 = time.time()
    examples_to_viz = X[5:10]

    for i in range(1, args.num_steps + 1):

        loss = svi.step(X, args.batch_size, z_pres_prior_p=partial(z_pres_prior_p, i))

        if args.progress_every > 0 and i % args.progress_every == 0:
            print('i={}, epochs={:.2f}, elapsed={:.2f}, elbo={:.2f}'.format(
                i,
                (i * args.batch_size) / X_size,
                (time.time() - t0) / 3600,
                loss / X_size))

        if args.viz and i % args.viz_every == 0:
            trace = poutine.trace(air.guide).get_trace(examples_to_viz, None)
            z, recons = poutine.replay(air.prior, trace=trace)(examples_to_viz.size(0))
            z_wheres = tensor_to_objs(latents_to_tensor(z))

            # Show data with inferred objection positions.
            vis.images(draw_many(examples_to_viz, z_wheres))
            # Show reconstructions of data.
            vis.images(draw_many(recons, z_wheres))

        if args.eval_every > 0 and i % args.eval_every == 0:
            # Measure accuracy on subset of training data.
            acc, counts, error_z, error_ix = count_accuracy(X, true_counts, air, 1000)
            print('i={}, accuracy={}, counts={}'.format(i, acc, counts.numpy().tolist()))
            if args.viz and error_ix.size(0) > 0:
                vis.images(draw_many(X[error_ix[0:5]], tensor_to_objs(error_z[0:5])),
                           opts=dict(caption='errors ({})'.format(i)))

        if 'save' in args and i % args.save_every == 0:
            print('Saving parameters...')
            torch.save(air.state_dict(), args.save)
예제 #43
0
        "beta_q",
        torch.tensor(15.0),
        constraint=constraints.positive
    )
    # sample heads_prob from the distribution Beta(alpha_q, beta_q)
    pyro.sample("heads_prob", dist.Beta(alpha_q, beta_q))


# generate data and set up optimizer
data = torch.tensor([1.0] * 20 + [0.0] * 10)
optimizer = Adam({"lr": 0.0005, "betas": (0.90, 0.999)})

# setup the inference algorithm
svi = SVI(model, guide, optimizer, loss=Trace_ELBO(num_particles=10))

n_steps = 5000
# do gradient steps
for step in range(n_steps):
    loss = svi.step(data)
    if step % 100 == 0:
        print(loss)

print(pyro.param("alpha_q").item())
print(pyro.param("beta_q").item())

# true posterior mean
(20 + 10) / (30 + 20)

# estimated posterior mean
pyro.param("beta_q") / pyro.param("alpha_q")
예제 #44
0
    def train(self,
              *,
              raw_expr,
              encoded_expr,
              num_epochs=100,
              batch_size=32,
              learning_rate=1e-3,
              eval_every=10,
              test_proportion=0.05,
              use_l1=False,
              l1_lam=0):

        seed = 2556
        torch.manual_seed(seed)
        pyro.set_rng_seed(seed)

        pyro.clear_param_store()
        logging.info('Validating data ...')

        assert (raw_expr.shape == encoded_expr.shape)
        read_depth = raw_expr.sum(-1)[:, np.newaxis]

        encoded_expr = np.hstack([encoded_expr, np.log(read_depth)])

        read_depth = torch.tensor(read_depth).to(self.device)
        raw_expr = torch.tensor(raw_expr).to(self.device)
        encoded_expr = torch.tensor(encoded_expr).to(self.device)

        logging.info('Initializing model ...')

        self.optimizer = Adam({"lr": 1e-3})
        self.loss = TraceMeanField_ELBO()

        if not use_l1:
            logging.info('No L1 regularization.')
            svi = SVI(self.model, self.guide, self.optimizer, loss=self.loss)

        test_set = np.random.rand(read_depth.shape[0]) < test_proportion
        train_set = ~test_set

        logging.info("Training with {} cells, testing with {}.".format(
            str(train_set.sum()), str(test_set.sum())))
        logging.info('Training ...')

        try:
            for epoch in range(1, num_epochs + 1):
                running_loss = 0.0
                for batch in self.epoch_batch(raw_expr[train_set],
                                              encoded_expr[train_set],
                                              read_depth[train_set],
                                              batch_size=batch_size):
                    if use_l1:
                        loss = self.custom_step(*batch)
                    else:
                        loss = svi.step(*batch)

                    running_loss += loss / batch_size

                logging.info('Done epoch {}/{}. Training loss: {:.3e}'.format(
                    str(epoch), str(num_epochs), running_loss))

                if (epoch % eval_every == 0
                        or epoch == num_epochs) and test_set.sum() > 0:
                    test_logp = self.evaluate(raw_expr[test_set],
                                              encoded_expr[test_set],
                                              read_depth[test_set])
                    logging.info('Test logp: {:.4e}'.format(test_logp))

        except KeyboardInterrupt:
            logging.error('Interrupted training.')

        self.summarize_posterior(raw_expr, encoded_expr, read_depth)

        return self
예제 #45
0
파일: main.py 프로젝트: Magica-Chen/pyro

# Do inference.
def per_param_optim_args(module_name, param_name, tags):
    lr = 1e-3 if 'baseline' in tags else 1e-4
    return {'lr': lr}


svi = SVI(air.model, air.guide,
          optim.Adam(per_param_optim_args),
          loss='ELBO',
          trace_graph=True)

for i in range(1, args.num_steps + 1):

    loss = svi.step(X, args.batch_size, z_pres_prior_p=partial(z_pres_prior_p, i))

    if args.progress_every > 0 and i % args.progress_every == 0:
        print('i={}, epochs={:.2f}, elapsed={:.2f}, elbo={:.2f}'.format(
            i,
            (i * args.batch_size) / X_size,
            (time.time() - t0) / 3600,
            loss / X_size))

    if args.viz and i % args.viz_every == 0:
        trace = poutine.trace(air.guide).get_trace(examples_to_viz, None)
        z, recons = poutine.replay(air.prior, trace)(examples_to_viz.size(0))
        z_wheres = post_process_latents(z)

        # Show data with inferred objection positions.
        vis.images(draw_many(examples_to_viz, z_wheres))
예제 #46
0
파일: vae.py 프로젝트: Magica-Chen/pyro
def main():
    # parse command line arguments
    parser = argparse.ArgumentParser(description="parse args")
    parser.add_argument('-n', '--num-epochs', default=101, type=int, help='number of training epochs')
    parser.add_argument('-tf', '--test-frequency', default=5, type=int, help='how often we evaluate the test set')
    parser.add_argument('-lr', '--learning-rate', default=1.0e-3, type=float, help='learning rate')
    parser.add_argument('-b1', '--beta1', default=0.95, type=float, help='beta1 adam hyperparameter')
    parser.add_argument('--cuda', action='store_true', default=False, help='whether to use cuda')
    parser.add_argument('-visdom', '--visdom_flag', default=False, help='Whether plotting in visdom is desired')
    parser.add_argument('-i-tsne', '--tsne_iter', default=100, type=int, help='epoch when tsne visualization runs')
    args = parser.parse_args()

    # setup MNIST data loaders
    # train_loader, test_loader
    train_loader, test_loader = setup_data_loaders(MNIST, use_cuda=args.cuda, batch_size=256)

    # setup the VAE
    vae = VAE(use_cuda=args.cuda)

    # setup the optimizer
    adam_args = {"lr": args.learning_rate}
    optimizer = Adam(adam_args)

    # setup the inference algorithm
    svi = SVI(vae.model, vae.guide, optimizer, loss="ELBO")

    # setup visdom for visualization
    if args.visdom_flag:
        vis = visdom.Visdom()

    train_elbo = []
    test_elbo = []
    # training loop
    for epoch in range(args.num_epochs):
        # initialize loss accumulator
        epoch_loss = 0.
        # do a training epoch over each mini-batch x returned
        # by the data loader
        for _, (x, _) in enumerate(train_loader):
            # if on GPU put mini-batch into CUDA memory
            if args.cuda:
                x = x.cuda()
            # wrap the mini-batch in a PyTorch Variable
            x = Variable(x)
            # do ELBO gradient and accumulate loss
            epoch_loss += svi.step(x)

        # report training diagnostics
        normalizer_train = len(train_loader.dataset)
        total_epoch_loss_train = epoch_loss / normalizer_train
        train_elbo.append(total_epoch_loss_train)
        print("[epoch %03d]  average training loss: %.4f" % (epoch, total_epoch_loss_train))

        if epoch % args.test_frequency == 0:
            # initialize loss accumulator
            test_loss = 0.
            # compute the loss over the entire test set
            for i, (x, _) in enumerate(test_loader):
                # if on GPU put mini-batch into CUDA memory
                if args.cuda:
                    x = x.cuda()
                # wrap the mini-batch in a PyTorch Variable
                x = Variable(x)
                # compute ELBO estimate and accumulate loss
                test_loss += svi.evaluate_loss(x)

                # pick three random test images from the first mini-batch and
                # visualize how well we're reconstructing them
                if i == 0:
                    if args.visdom_flag:
                        plot_vae_samples(vae, vis)
                        reco_indices = np.random.randint(0, x.size(0), 3)
                        for index in reco_indices:
                            test_img = x[index, :]
                            reco_img = vae.reconstruct_img(test_img)
                            vis.image(test_img.contiguous().view(28, 28).data.cpu().numpy(),
                                      opts={'caption': 'test image'})
                            vis.image(reco_img.contiguous().view(28, 28).data.cpu().numpy(),
                                      opts={'caption': 'reconstructed image'})

            # report test diagnostics
            normalizer_test = len(test_loader.dataset)
            total_epoch_loss_test = test_loss / normalizer_test
            test_elbo.append(total_epoch_loss_test)
            print("[epoch %03d]  average test loss: %.4f" % (epoch, total_epoch_loss_test))

        if epoch == args.tsne_iter:
            mnist_test_tsne(vae=vae, test_loader=test_loader)
            plot_llk(np.array(train_elbo), np.array(test_elbo))

    return vae
예제 #47
0
    def _test_vectorized_map_data_in_elbo(self, n_superfluous_top, n_superfluous_bottom, n_steps):
        pyro.clear_param_store()
        self.data_tensor = Variable(torch.zeros(9, 2))
        for _out in range(self.n_outer):
            for _in in range(self.n_inner):
                self.data_tensor[3 * _out + _in, :] = self.data[_out][_in]

        def model():
            mu_latent = pyro.sample(
                    "mu_latent",
                    dist.Normal(self.mu0, torch.pow(self.lam0, -0.5), reparameterized=False))

            def obs_inner(i, _i, _x):
                for k in range(n_superfluous_top):
                    pyro.sample("z_%d_%d" % (i, k),
                                dist.Normal(ng_zeros(4 - i, 1), ng_ones(4 - i, 1), reparameterized=False))
                pyro.observe("obs_%d" % i, dist.normal, _x, mu_latent, torch.pow(self.lam, -0.5))
                for k in range(n_superfluous_top, n_superfluous_top + n_superfluous_bottom):
                    pyro.sample("z_%d_%d" % (i, k),
                                dist.Normal(ng_zeros(4 - i, 1), ng_ones(4 - i, 1), reparameterized=False))

            def obs_outer(i, x):
                pyro.map_data("map_obs_inner_%d" % i, x, lambda _i, _x:
                              obs_inner(i, _i, _x), batch_size=4 - i)

            pyro.map_data("map_obs_outer", [self.data_tensor[0:4, :], self.data_tensor[4:7, :],
                                            self.data_tensor[7:9, :]],
                          lambda i, x: obs_outer(i, x), batch_size=3)

            return mu_latent

        pt_mu_baseline = torch.nn.Linear(1, 1)
        pt_superfluous_baselines = []
        for k in range(n_superfluous_top + n_superfluous_bottom):
            pt_superfluous_baselines.extend([torch.nn.Linear(2, 4), torch.nn.Linear(2, 3),
                                             torch.nn.Linear(2, 2)])

        def guide():
            mu_q = pyro.param("mu_q", Variable(self.analytic_mu_n.data + 0.094 * torch.ones(2),
                                               requires_grad=True))
            log_sig_q = pyro.param("log_sig_q", Variable(
                                   self.analytic_log_sig_n.data - 0.11 * torch.ones(2), requires_grad=True))
            sig_q = torch.exp(log_sig_q)
            trivial_baseline = pyro.module("mu_baseline", pt_mu_baseline, tags="baseline")
            baseline_value = trivial_baseline(ng_ones(1))
            mu_latent = pyro.sample("mu_latent",
                                    dist.Normal(mu_q, sig_q, reparameterized=False),
                                    baseline=dict(baseline_value=baseline_value))

            def obs_inner(i, _i, _x):
                for k in range(n_superfluous_top + n_superfluous_bottom):
                    z_baseline = pyro.module("z_baseline_%d_%d" % (i, k),
                                             pt_superfluous_baselines[3 * k + i], tags="baseline")
                    baseline_value = z_baseline(mu_latent.detach()).unsqueeze(-1)
                    mean_i = pyro.param("mean_%d_%d" % (i, k),
                                        Variable(0.5 * torch.ones(4 - i, 1), requires_grad=True))
                    pyro.sample("z_%d_%d" % (i, k),
                                dist.Normal(mean_i, ng_ones(4 - i, 1), reparameterized=False),
                                baseline=dict(baseline_value=baseline_value))

            def obs_outer(i, x):
                pyro.map_data("map_obs_inner_%d" % i, x, lambda _i, _x:
                              obs_inner(i, _i, _x), batch_size=4 - i)

            pyro.map_data("map_obs_outer", [self.data_tensor[0:4, :], self.data_tensor[4:7, :],
                                            self.data_tensor[7:9, :]],
                          lambda i, x: obs_outer(i, x), batch_size=3)

            return mu_latent

        def per_param_callable(module_name, param_name, tags):
            if 'baseline' in tags:
                return {"lr": 0.010, "betas": (0.95, 0.999)}
            else:
                return {"lr": 0.0012, "betas": (0.95, 0.999)}

        adam = optim.Adam(per_param_callable)
        svi = SVI(model, guide, adam, loss="ELBO", trace_graph=True)

        for step in range(n_steps):
            svi.step()

            mu_error = param_abs_error("mu_q", self.analytic_mu_n)
            log_sig_error = param_abs_error("log_sig_q", self.analytic_log_sig_n)

            if n_superfluous_top > 0 or n_superfluous_bottom > 0:
                superfluous_errors = []
                for k in range(n_superfluous_top + n_superfluous_bottom):
                    mean_0_error = torch.sum(torch.pow(pyro.param("mean_0_%d" % k), 2.0))
                    mean_1_error = torch.sum(torch.pow(pyro.param("mean_1_%d" % k), 2.0))
                    mean_2_error = torch.sum(torch.pow(pyro.param("mean_2_%d" % k), 2.0))
                    superfluous_error = torch.max(torch.max(mean_0_error, mean_1_error), mean_2_error)
                    superfluous_errors.append(superfluous_error.data.cpu().numpy()[0])

            if step % 500 == 0 and self.verbose:
                print("mu error, log(sigma) error:  %.4f, %.4f" % (mu_error, log_sig_error))
                if n_superfluous_top > 0 or n_superfluous_bottom > 0:
                    print("superfluous error: %.4f" % np.max(superfluous_errors))

        self.assertEqual(0.0, mu_error, prec=0.04)
        self.assertEqual(0.0, log_sig_error, prec=0.05)
        if n_superfluous_top > 0 or n_superfluous_bottom > 0:
            self.assertEqual(0.0, np.max(superfluous_errors), prec=0.04)
class SVILossCompute(LossCompute):
    """A simple loss compute and train function."""
    def __init__(self,
                 generator,
                 model,
                 guide,
                 optimizer,
                 optim_params,
                 elbo_type='TraceELBO',
                 num_particles=1,
                 eval=False,
                 step=1. / 30000.0,
                 aux_model=None,
                 aux_guide=None):
        optim = self.getOptimizer(optimizer, optim_params)
        elbo = self.getELBO(elbo_type, num_particles)
        criterion = SVI(model, guide, optim, loss=elbo)
        super(SVILossCompute, self).__init__(generator, criterion, optim)

        self.eval = eval
        self.guide = guide
        self.model = model
        self.kl_anneal = step
        self.step = step
        self.aux_criterion = None
        #hack to get only KL term
        self.model_no_obs = poutine.block(model, hide=["preds", 'lm_preds'])
        optim = self.getOptimizer(optimizer, optim_params)
        elbo = self.getELBO(elbo_type, num_particles)
        self.kl_eval_svi = SVI(self.model_no_obs, self.guide, optim, elbo)

        #aux model and guide are for calculating additional loss terms...
        if aux_model is not None and aux_guide is not None:
            print('setting aux loss, ')
            logging.info("setting aux loss")
            optim = self.getOptimizer(optimizer, optim_params)
            elbo = self.getELBO(elbo_type, num_particles)
            self.aux_criterion = SVI(aux_model, aux_guide, optim, loss=elbo)

        self.aux_guide = aux_guide
        self.aux_model = aux_model

    def setKLAnnealingSchedule(self, step_size, kl_anneal):
        """
            step_size: how much to increase weight of KL term at each step
            beta: current weight of kl term
        """
        self.step = step_size
        self.kl_anneal = kl_anneal

    def getKLAnnealingSchedule(self):
        return self.step, self.kl_anneal

    def getOptimizerStateDict(self):
        return self.criterion.optim.get_state()

    def setOptimizerStateDict(self, state_dict):
        return self.criterion.optim.set_state(state_dict)

    def getELBO(self, elbo_type, particles):
        if elbo_type == 'TraceELBO':
            return Trace_ELBO(num_particles=particles)
        elif elbo_type == "MeanFieldELBO":
            return TraceMeanField_ELBO(num_particles=particles)
        else:
            raise ValueError("{} ELBO not supported".format(elbo_type))

    def getOptimizer(self, optimizer, optim_params):
        if optimizer == 'clippedadam':
            return PyroOptim(ClippedAdam, optim_params)
        elif optimizer == 'adadelta':
            #not 100% on this but pretty sure ** "dereferences" the dictionary
            return Adadelta(optim_params)
        elif optimizer == 'clippedadadelta':
            #since it's custom, gotta set it up in the way Pyro expects
            return PyroOptim(ClippedAdadelta, optim_params)
        else:
            raise ValueError("{} optimizer not supported".format(optimizer))

    def __call__(self, src, trg, src_mask, trg_mask, src_lengths, trg_lengths,
                 trg_y, norm):
        #x = self.generator(x)
        kl_anneal = self.kl_anneal
        if self.eval:
            #you could also do .eval_loss or something but this allows a bit more probing of results
            with torch.no_grad():
                elbo = self.criterion.evaluate_loss(
                    src, trg, src_mask, trg_mask, src_lengths, trg_lengths,
                    trg_y) * norm
                kl_term = self.kl_eval_svi.evaluate_loss(
                    src, trg, src_mask, trg_mask, src_lengths, trg_lengths,
                    trg_y) * norm
                nll = elbo - kl_term

                def torch_item(x):
                    return x if isinstance(x, numbers.Number) else x.item()

            if self.aux_criterion is not None:
                aux_loss = self.aux_criterion.evaluate_loss(
                    src, trg, src_mask, trg_mask, src_lengths, trg_lengths,
                    trg_y)
            else:
                aux_loss = -1.0

            loss = {
                'elbo': elbo,
                'nll': nll,
                'approx_kl': kl_term,
                'aux_loss': aux_loss
            }

        else:
            loss = self.criterion.step(src, trg, src_mask, trg_mask,
                                       src_lengths, trg_lengths, trg_y,
                                       kl_anneal)
            if self.aux_criterion is not None:
                aux_loss = self.aux_criterion.step(src, trg, src_mask,
                                                   trg_mask, src_lengths,
                                                   trg_lengths, trg_y,
                                                   kl_anneal)
            loss = loss * norm
            self.kl_anneal = min(self.kl_anneal + self.step, 1.0)

        return loss
예제 #49
0
def main(args):
    """ Train GAE """
    print("Using {} dataset".format(args.dataset_str))
    # Load data
    np.random.seed(1)
    adj, features = load_data(args.dataset_str)
    N, D = features.shape

    # Store original adjacency matrix (without diagonal entries)
    adj_orig = adj
    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(
        adj)

    # Some preprocessing
    adj_train_norm = preprocess_graph(adj_train)
    adj_train_norm = Variable(make_sparse(adj_train_norm))
    adj_train_labels = Variable(
        torch.FloatTensor(adj_train + sp.eye(adj_train.shape[0]).todense()))
    features = Variable(make_sparse(features))

    n_edges = adj_train_labels.sum()

    data = {
        'adj_norm': adj_train_norm,
        'adj_labels': adj_train_labels,
        'features': features,
    }

    gae = GAE(data,
              n_hidden=32,
              n_latent=16,
              dropout=args.dropout,
              subsampling=args.subsampling)

    optimizer = Adam({"lr": args.lr, "betas": (0.95, 0.999)})

    svi = SVI(gae.model, gae.guide, optimizer, loss="ELBO")

    # Results
    results = defaultdict(list)

    # Full batch training loop
    for epoch in range(args.num_epochs):
        # initialize loss accumulator
        epoch_loss = 0.
        # do ELBO gradient and accumulate loss
        epoch_loss += svi.step()
        # report training diagnostics
        if args.subsampling:
            normalized_loss = epoch_loss / float(2 * n_edges)
        else:
            normalized_loss = epoch_loss / (2 * N * N)

        results['train_elbo'].append(normalized_loss)

        # Training loss
        emb = gae.get_embeddings()
        accuracy, roc_curr, ap_curr, = eval_gae(val_edges, val_edges_false,
                                                emb, adj_orig)

        results['accuracy_train'].append(accuracy)
        results['roc_train'].append(roc_curr)
        results['ap_train'].append(ap_curr)

        print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
              "{:.5f}".format(normalized_loss), "train_acc=",
              "{:.5f}".format(accuracy), "val_roc=", "{:.5f}".format(roc_curr),
              "val_ap=", "{:.5f}".format(ap_curr))

        # Test loss
        if epoch % args.test_freq == 0:
            emb = gae.get_embeddings()
            accuracy, roc_score, ap_score = eval_gae(test_edges,
                                                     test_edges_false, emb,
                                                     adj_orig)
            results['accuracy_test'].append(accuracy)
            results['roc_test'].append(roc_curr)
            results['ap_test'].append(ap_curr)

    print("Optimization Finished!")

    # Test loss
    emb = gae.get_embeddings()
    accuracy, roc_score, ap_score = eval_gae(test_edges, test_edges_false, emb,
                                             adj_orig)
    print('Test Accuracy: ' + str(accuracy))
    print('Test ROC score: ' + str(roc_score))
    print('Test AP score: ' + str(ap_score))

    # Plot
    plot_results(results,
                 args.test_freq,
                 path=args.dataset_str + "_results.png")
예제 #50
0
from pyro.infer import Trace_ELBO
svi = SVI(model, guide, optimizer, loss=Trace_ELBO())  #, num_particles=7)

n_steps = 10 * 400000
# do gradient steps
for step in range(1, n_steps):
    if step % 100 == 1:
        print("DOING A STEP")
        print(".......")
        print(step)

#    quit()

#    for name in pyro.get_param_store().get_all_param_names():
#       print [name, pyro.param(name).data.numpy()]
    svi.step(corpus)

    if step % 2000 == 0:
        print("Saving")
        save_path = "../raw-results/"
        #save_path = "/afs/cs.stanford.edu/u/mhahn/scr/deps/"
        with open(
                "output/" + args.language + "_" + __file__ + "_model_" +
                str(myID) + ".tsv", "w") as outFile:
            print >> outFile, ("\t".join(
                list(
                    map(str, [
                        "Counter", "Document", "DH_Mean_NoPunct",
                        "DH_Sigma_NoPunct", "Distance_Mean_NoPunct",
                        "Distance_Sigma_NoPunct", "Dependency"
                    ]))))
# Initialize the SVI optimzation class
my_svi = SVI(model=model_gamma,
             guide= my_guide,
             optim=ClippedAdam({"lr": 0.01, 'clip_norm': 1.0}),
             loss=Trace_ELBO())

losses = []

start_time = time.time()

# Perform optimization
for i in range(5000):

    loss = my_svi.step(X_train_torch,
                       y_train_torch,  
                       california.feature_names)
    
    normalized_loss = loss/X_train_torch.shape[0]
    
    # Tabulate the loss for plotting
    losses.append(normalized_loss)
    if (i % 250 == 0):
        print(f'iter: {i}, normalized loss:{round(normalized_loss,2)}')
        
        


# In[53]:

예제 #52
0

for epoch in range(30):
    print("Start epoch!")
    # initialize loss accumulator
    epoch_loss = 0.
    # do a training epoch over each mini-batch x
    # returned by the data loader
    for convo_i in range(dataset.size()):
        x, y = dataset.next_batch()

        #HACK for overfitting
        # y = [100, 30, 11, 1, 0, 24, 8, 4, 17, 11, 1, 6, 0, 9, 4, 8, 6, 24, 9, 1, 101]

        x = dataset.to_onehot(x, long_type=False)
        y = dataset.to_onehot(y, long_type=False)
        
        # do ELBO gradient and accumulate loss
        if USE_CUDA:
            loss = svi.step(x.cuda(), y.cuda(), convo_i)
        else:
            loss = svi.step(x, y, convo_i)
        epoch_loss += loss

        # print loss
        if convo_i % 10 == 0:
            print("Epoch: {}, Step: {}, NLL: {}".format(epoch, convo_i, loss))
            print("---------------------------\n")

    print("\n\nTrained epoch: {}, epoch loss: {}\n\n".format(epoch, epoch_loss))
예제 #53
0
파일: BNN2.py 프로젝트: CheChem/BNN
    lifted_module = pyro.random_module("module", net, priors)

    return lifted_module()


optim = Adam({"lr": 0.01})
svi = SVI(model, guide, optim, loss=Trace_ELBO())

num_iterations = 5
loss = 0

for j in range(1000):
    loss = 0
    for batch_id, data in enumerate(X_train):
        # calculate the loss and take a gradient step
        loss += svi.step(data[0].view(-1, 1), data[1])
    normalizer_train = len(train_loader.dataset)
    total_epoch_loss_train = loss / normalizer_train

    print("Epoch ", j, " Loss ", total_epoch_loss_train)

num_samples = 10


def predict(x):
    sampled_models = [guide(None, None) for _ in range(num_samples)]
    yhats = [model(x).data for model in sampled_models]
    mean = torch.mean(torch.stack(yhats), 0)
    return torch.argmax(mean, dim=1)

예제 #54
0
svae = PhoneVAE(batch_size=1)
optimizer = Adam(ADAM_CONFIG)
svi = SVI(svae.model, svae.guide, optimizer, loss=Trace_ELBO())


"""
Train the model
"""
train_elbo = []
for e in range(NUM_EPOCHS):
    epoch_loss = 0.
    for string in TEST_STRINGS:
        # Pad input string differently than observed string so program doesn't get rewarded by making string short
        one_hot_string = strings_to_tensor([string], MAX_STRING_LEN)
        if CUDA: one_hot_string.cuda()
        svi.step(one_hot_string)
        epoch_loss += svi.step(one_hot_string)
    if e % RECORD_EVERY == 0:
        avg_epoch_loss = epoch_loss/len(TEST_STRINGS)
        print(f"Epoch #{e} Average Loss: {avg_epoch_loss}")
        train_elbo.append(avg_epoch_loss)
        epoch_loss = 0



plt.plot(train_elbo)
plt.title("ELBO")
plt.xlabel("step")
plt.ylabel("loss")
plt.savefig(f"result/{SESSION_NAME}.png")
예제 #55
0
파일: scanvi.py 프로젝트: zeta1999/pyro
def main(args):
    # Fix random number seed
    pyro.util.set_rng_seed(args.seed)
    # Enable optional validation warnings
    pyro.enable_validation(True)

    # Load and pre-process data
    dataloader, num_genes, l_mean, l_scale, anndata = get_data(dataset=args.dataset, batch_size=args.batch_size,
                                                               cuda=args.cuda)

    # Instantiate instance of model/guide and various neural networks
    scanvi = SCANVI(num_genes=num_genes, num_labels=4, l_loc=l_mean, l_scale=l_scale,
                    scale_factor=1.0 / (args.batch_size * num_genes))

    if args.cuda:
        scanvi.cuda()

    # Setup an optimizer (Adam) and learning rate scheduler.
    # By default we start with a moderately high learning rate (0.005)
    # and reduce by a factor of 5 after 20 epochs.
    scheduler = MultiStepLR({'optimizer': Adam,
                             'optim_args': {'lr': args.learning_rate},
                             'milestones': [20],
                             'gamma': 0.2})

    # Tell Pyro to enumerate out y when y is unobserved
    guide = config_enumerate(scanvi.guide, "parallel", expand=True)

    # Setup a variational objective for gradient-based learning.
    # Note we use TraceEnum_ELBO in order to leverage Pyro's machinery
    # for automatic enumeration of the discrete latent variable y.
    elbo = TraceEnum_ELBO(strict_enumeration_warning=False)
    svi = SVI(scanvi.model, guide, scheduler, elbo)

    # Training loop
    for epoch in range(args.num_epochs):
        losses = []

        for x, y in dataloader:
            if y is not None:
                y = y.type_as(x)
            loss = svi.step(x, y)
            losses.append(loss)

        # Tell the scheduler we've done one epoch.
        scheduler.step()

        print("[Epoch %04d]  Loss: %.5f" % (epoch, np.mean(losses)))

    # Put neural networks in eval mode (needed for batchnorm)
    scanvi.eval()

    # Now that we're done training we'll inspect the latent representations we've learned
    if args.plot and args.dataset == 'pbmc':
        import scanpy as sc
        # Compute latent representation (z2_loc) for each cell in the dataset
        latent_rep = scanvi.z2l_encoder(dataloader.data_x)[0]

        # Compute inferred cell type probabilities for each cell
        y_logits = scanvi.classifier(latent_rep)
        y_probs = softmax(y_logits, dim=-1).data.cpu().numpy()

        # Use scanpy to compute 2-dimensional UMAP coordinates using our
        # learned 10-dimensional latent representation z2
        anndata.obsm["X_scANVI"] = latent_rep.data.cpu().numpy()
        sc.pp.neighbors(anndata, use_rep="X_scANVI")
        sc.tl.umap(anndata)
        umap1, umap2 = anndata.obsm['X_umap'][:, 0], anndata.obsm['X_umap'][:, 1]

        # Construct plots; all plots are scatterplots depicting the two-dimensional UMAP embedding
        # and only differ in how points are colored

        # The topmost plot depicts the 200 hand-curated seed labels in our dataset
        fig, axes = plt.subplots(3, 2)
        seed_marker_sizes = anndata.obs['seed_marker_sizes']
        axes[0, 0].scatter(umap1, umap2, s=seed_marker_sizes, c=anndata.obs['seed_colors'], marker='.', alpha=0.7)
        axes[0, 0].set_title('Hand-Curated Seed Labels')
        patch1 = Patch(color='lightcoral', label='CD8-Naive')
        patch2 = Patch(color='limegreen', label='CD4-Naive')
        patch3 = Patch(color='deepskyblue', label='CD4-Memory')
        patch4 = Patch(color='mediumorchid', label='CD4-Regulatory')
        axes[0, 1].legend(loc='center left', handles=[patch1, patch2, patch3, patch4])
        axes[0, 1].get_xaxis().set_visible(False)
        axes[0, 1].get_yaxis().set_visible(False)
        axes[0, 1].set_frame_on(False)

        # The remaining plots depict the inferred cell type probability for each of the four cell types
        s10 = axes[1, 0].scatter(umap1, umap2, s=1, c=y_probs[:, 0], marker='.', alpha=0.7)
        axes[1, 0].set_title('Inferred CD8-Naive probability')
        fig.colorbar(s10, ax=axes[1, 0])
        s11 = axes[1, 1].scatter(umap1, umap2, s=1, c=y_probs[:, 1], marker='.', alpha=0.7)
        axes[1, 1].set_title('Inferred CD4-Naive probability')
        fig.colorbar(s11, ax=axes[1, 1])
        s20 = axes[2, 0].scatter(umap1, umap2, s=1, c=y_probs[:, 2], marker='.', alpha=0.7)
        axes[2, 0].set_title('Inferred CD4-Memory probability')
        fig.colorbar(s20, ax=axes[2, 0])
        s21 = axes[2, 1].scatter(umap1, umap2, s=1, c=y_probs[:, 3], marker='.', alpha=0.7)
        axes[2, 1].set_title('Inferred CD4-Regulatory probability')
        fig.colorbar(s21, ax=axes[2, 1])

        fig.tight_layout()
        plt.savefig('scanvi.pdf')
예제 #56
0
def main(args):
    # Init tensorboard
    writer = SummaryWriter('./runs/' + args.runname + str(args.trialnumber))
    model_name = 'VanillaDMM'

    # Set evaluation log file
    evaluation_logpath = './logs/{}/evaluation_result.log'.format(
        model_name.lower())
    log_evaluation(evaluation_logpath,
                   'Evaluation Trial - {}\n'.format(args.trialnumber))

    # Constants
    time_length = 30
    input_length_for_pred = 20
    pred_length = time_length - input_length_for_pred
    train_batch_size = 16
    valid_batch_size = 1

    # For model
    input_channels = 1
    z_channels = 50
    emission_channels = [64, 32]
    transition_channels = 64
    encoder_channels = [32, 64]
    rnn_input_dim = 256
    rnn_channels = 128
    kernel_size = 3
    pred_length = 0

    # Device checking
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:0" if use_cuda else "cpu")

    # Make dataset
    logging.info("Generate data")
    train_datapath = args.datapath / 'train'
    valid_datapath = args.datapath / 'valid'
    train_dataset = DiffusionDataset(train_datapath)
    valid_dataset = DiffusionDataset(valid_datapath)

    # Create data loaders from pickle data
    logging.info("Generate data loaders")
    train_dataloader = DataLoader(
        train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=8)
    valid_dataloader = DataLoader(
        valid_dataset, batch_size=valid_batch_size, num_workers=4)

    # Training parameters
    width = 100
    height = 100
    input_dim = width * height

    # Create model
    logging.warning("Generate model")
    logging.warning(input_dim)
    pred_input_dim = 10
    dmm = DMM(input_channels=input_channels, z_channels=z_channels, emission_channels=emission_channels,
              transition_channels=transition_channels, encoder_channels=encoder_channels, rnn_input_dim=rnn_input_dim, rnn_channels=rnn_channels, kernel_size=kernel_size, height=height, width=width, pred_input_dim=pred_input_dim, num_layers=1, rnn_dropout_rate=0.0,
              num_iafs=0, iaf_dim=50, use_cuda=use_cuda)

    # Initialize model
    logging.info("Initialize model")
    epochs = args.endepoch
    learning_rate = 0.0001
    beta1 = 0.9
    beta2 = 0.999
    clip_norm = 10.0
    lr_decay = 1.0
    weight_decay = 0
    adam_params = {"lr": learning_rate, "betas": (beta1, beta2),
                   "clip_norm": clip_norm, "lrd": lr_decay,
                   "weight_decay": weight_decay}
    adam = ClippedAdam(adam_params)
    elbo = Trace_ELBO()
    svi = SVI(dmm.model, dmm.guide, adam, loss=elbo)

    # saves the model and optimizer states to disk
    save_model = Path('./checkpoints/' + model_name)

    def save_checkpoint(epoch):
        save_dir = save_model / '{}.model'.format(epoch)
        save_opt_dir = save_model / '{}.opt'.format(epoch)
        logging.info("saving model to %s..." % save_dir)
        torch.save(dmm.state_dict(), save_dir)
        logging.info("saving optimizer states to %s..." % save_opt_dir)
        adam.save(save_opt_dir)
        logging.info("done saving model and optimizer checkpoints to disk.")

    # Starting epoch
    start_epoch = args.startepoch

    # loads the model and optimizer states from disk
    if start_epoch != 0:
        load_opt = './checkpoints/' + model_name + \
            '/e{}-i188-opt-tn{}.opt'.format(start_epoch - 1, args.trialnumber)
        load_model = './checkpoints/' + model_name + \
            '/e{}-i188-tn{}.pt'.format(start_epoch - 1, args.trialnumber)

        def load_checkpoint():
            # assert exists(load_opt) and exists(load_model), \
            #     "--load-model and/or --load-opt misspecified"
            logging.info("loading model from %s..." % load_model)
            dmm.load_state_dict(torch.load(load_model, map_location=device))
            # logging.info("loading optimizer states from %s..." % load_opt)
            # adam.load(load_opt)
            # logging.info("done loading model and optimizer states.")

        if load_model != '':
            logging.info('Load checkpoint')
            load_checkpoint()

    # Validation only?
    validation_only = args.validonly

    # Train the model
    if not validation_only:
        logging.info("Training model")
        annealing_epochs = 1000
        minimum_annealing_factor = 0.2
        N_train_size = 3000
        N_mini_batches = int(N_train_size / train_batch_size +
                             int(N_train_size % train_batch_size > 0))
        for epoch in tqdm(range(start_epoch, epochs), desc='Epoch', leave=True):
            r_loss_train = 0
            dmm.train(True)
            idx = 0
            mov_avg_loss = 0
            mov_data_len = 0
            for which_mini_batch, data in enumerate(tqdm(train_dataloader, desc='Train', leave=True)):
                if annealing_epochs > 0 and epoch < annealing_epochs:
                    # compute the KL annealing factor approriate for the current mini-batch in the current epoch
                    min_af = minimum_annealing_factor
                    annealing_factor = min_af + (1.0 - min_af) * \
                        (float(which_mini_batch + epoch * N_mini_batches + 1) /
                         float(annealing_epochs * N_mini_batches))
                else:
                    # by default the KL annealing factor is unity
                    annealing_factor = 1.0

                data['observation'] = normalize(
                    data['observation'].unsqueeze(2).to(device))
                batch_size, length, _, w, h = data['observation'].shape
                data_reversed = reverse_sequences(data['observation'])
                data_mask = torch.ones(
                    batch_size, length, input_channels, w, h).cuda()

                loss = svi.step(data['observation'],
                                data_reversed, data_mask, annealing_factor)

                # Running losses
                mov_avg_loss += loss
                mov_data_len += batch_size

                r_loss_train += loss
                idx += 1

            # Average losses
            train_loss_avg = r_loss_train / (len(train_dataset) * time_length)
            writer.add_scalar('Loss/train', train_loss_avg, epoch)
            logging.info("Epoch: %d, Training loss: %1.5f",
                         epoch, train_loss_avg)

            # # Time to time evaluation
            if epoch == epochs - 1:
                for temp_pred_length in [20]:
                    r_loss_valid = 0
                    r_loss_loc_valid = 0
                    r_loss_scale_valid = 0
                    r_loss_latent_valid = 0
                    dmm.train(False)
                    val_pred_length = temp_pred_length
                    val_pred_input_length = 10
                    with torch.no_grad():
                        for i, data in enumerate(tqdm(valid_dataloader, desc='Eval', leave=True)):
                            data['observation'] = normalize(
                                data['observation'].unsqueeze(2).to(device))
                            batch_size, length, _, w, h = data['observation'].shape
                            data_reversed = reverse_sequences(
                                data['observation'])
                            data_mask = torch.ones(
                                batch_size, length, input_channels, w, h).cuda()

                            pred_tensor = data['observation'][:,
                                                              :input_length_for_pred, :, :, :]
                            pred_tensor_reversed = reverse_sequences(
                                pred_tensor)
                            pred_tensor_mask = torch.ones(
                                batch_size, input_length_for_pred, input_channels, w, h).cuda()

                            ground_truth = data['observation'][:,
                                                               input_length_for_pred:, :, :, :]

                            val_nll = svi.evaluate_loss(
                                data['observation'], data_reversed, data_mask)

                            preds, _, loss_loc, loss_scale = do_prediction_rep_inference(
                                dmm, pred_tensor_mask, val_pred_length, val_pred_input_length, data['observation'])

                            ground_truth = denormalize(
                                data['observation'].squeeze().cpu().detach()
                            )
                            pred_with_input = denormalize(
                                torch.cat(
                                    [data['observation'][:, :-val_pred_length, :, :, :].squeeze(),
                                     preds.squeeze()], dim=0
                                ).cpu().detach()
                            )

                            # Running losses
                            r_loss_valid += val_nll
                            r_loss_loc_valid += loss_loc
                            r_loss_scale_valid += loss_scale

                    # Average losses
                    valid_loss_avg = r_loss_valid / \
                        (len(valid_dataset) * time_length)
                    valid_loss_loc_avg = r_loss_loc_valid / \
                        (len(valid_dataset) * val_pred_length * width * height)
                    valid_loss_scale_avg = r_loss_scale_valid / \
                        (len(valid_dataset) * val_pred_length * width * height)
                    writer.add_scalar('Loss/test', valid_loss_avg, epoch)
                    writer.add_scalar(
                        'Loss/test_obs', valid_loss_loc_avg, epoch)
                    writer.add_scalar('Loss/test_scale',
                                      valid_loss_scale_avg, epoch)
                    logging.info("Validation loss: %1.5f", valid_loss_avg)
                    logging.info("Validation obs loss: %1.5f",
                                 valid_loss_loc_avg)
                    logging.info("Validation scale loss: %1.5f",
                                 valid_loss_scale_avg)
                    log_evaluation(evaluation_logpath, "Validation obs loss for {}s pred {}: {}\n".format(
                        val_pred_length, args.trialnumber, valid_loss_loc_avg))
                    log_evaluation(evaluation_logpath, "Validation scale loss for {}s pred {}: {}\n".format(
                        val_pred_length, args.trialnumber, valid_loss_scale_avg))

            # Save model
            if epoch % 50 == 0 or epoch == epochs - 1:
                torch.save(dmm.state_dict(), args.modelsavepath / model_name /
                           'e{}-i{}-tn{}.pt'.format(epoch, idx, args.trialnumber))
                adam.save(args.modelsavepath / model_name /
                          'e{}-i{}-opt-tn{}.opt'.format(epoch, idx, args.trialnumber))

    # Last validation after training
    test_samples_indices = range(100)
    total_n = 0
    if validation_only:
        r_loss_loc_valid = 0
        r_loss_scale_valid = 0
        r_loss_latent_valid = 0
        dmm.train(False)
        val_pred_length = args.validpredlength
        val_pred_input_length = 10
        with torch.no_grad():
            for i in tqdm(test_samples_indices, desc='Valid', leave=True):
                # Data processing
                data = valid_dataset[i]
                if torch.isnan(torch.sum(data['observation'])):
                    print("Skip {}".format(i))
                    continue
                else:
                    total_n += 1
                data['observation'] = normalize(
                    data['observation'].unsqueeze(0).unsqueeze(2).to(device))
                batch_size, length, _, w, h = data['observation'].shape
                data_reversed = reverse_sequences(data['observation'])
                data_mask = torch.ones(
                    batch_size, length, input_channels, w, h).to(device)

                # Prediction
                pred_tensor_mask = torch.ones(
                    batch_size, input_length_for_pred, input_channels, w, h).to(device)
                preds, _, loss_loc, loss_scale = do_prediction_rep_inference(
                    dmm, pred_tensor_mask, val_pred_length, val_pred_input_length, data['observation'])

                ground_truth = denormalize(
                    data['observation'].squeeze().cpu().detach()
                )
                pred_with_input = denormalize(
                    torch.cat(
                        [data['observation'][:, :-val_pred_length, :, :, :].squeeze(),
                         preds.squeeze()], dim=0
                    ).cpu().detach()
                )

                # Save samples
                if i < 5:
                    save_dir_samples = Path('./samples/more_variance_long')
                    with open(save_dir_samples / '{}-gt-test.pkl'.format(i), 'wb') as fout:
                        pickle.dump(ground_truth, fout)
                    with open(save_dir_samples / '{}-vanilladmm-pred-test.pkl'.format(i), 'wb') as fout:
                        pickle.dump(pred_with_input, fout)

                # Running losses
                r_loss_loc_valid += loss_loc
                r_loss_scale_valid += loss_scale
                r_loss_latent_valid += np.sum((preds.squeeze().detach().cpu().numpy(
                ) - data['latent'][time_length - val_pred_length:, :, :].detach().cpu().numpy()) ** 2)

        # Average losses
        test_samples_indices = range(total_n)
        print(total_n)
        valid_loss_loc_avg = r_loss_loc_valid / \
            (total_n * val_pred_length * width * height)
        valid_loss_scale_avg = r_loss_scale_valid / \
            (total_n * val_pred_length * width * height)
        valid_loss_latent_avg = r_loss_latent_valid / \
            (total_n * val_pred_length * width * height)
        logging.info("Validation obs loss for %ds pred VanillaDMM: %f",
                     val_pred_length, valid_loss_loc_avg)
        logging.info("Validation latent loss: %f", valid_loss_latent_avg)

        with open('VanillaDMMResult.log', 'a+') as fout:
            validation_log = 'Pred {}s VanillaDMM: {}\n'.format(
                val_pred_length, valid_loss_loc_avg)
            fout.write(validation_log)
예제 #57
0
def test_elbo_mapdata(map_type, batch_size, n_steps, lr):
    # normal-normal: known covariance
    lam0 = torch.tensor([0.1, 0.1])  # precision of prior
    loc0 = torch.tensor([0.0, 0.5])  # prior mean
    # known precision of observation noise
    lam = torch.tensor([6.0, 4.0])
    data = []
    sum_data = torch.zeros(2)

    def add_data_point(x, y):
        data.append(torch.tensor([x, y]))
        sum_data.data.add_(data[-1].data)

    add_data_point(0.1, 0.21)
    add_data_point(0.16, 0.11)
    add_data_point(0.06, 0.31)
    add_data_point(-0.01, 0.07)
    add_data_point(0.23, 0.25)
    add_data_point(0.19, 0.18)
    add_data_point(0.09, 0.41)
    add_data_point(-0.04, 0.17)

    data = torch.stack(data)
    n_data = torch.tensor([float(len(data))])
    analytic_lam_n = lam0 + n_data.expand_as(lam) * lam
    analytic_log_sig_n = -0.5 * torch.log(analytic_lam_n)
    analytic_loc_n = sum_data * (lam / analytic_lam_n) + loc0 * (
        lam0 / analytic_lam_n)

    logger.debug("DOING ELBO TEST [bs = {}, map_type = {}]".format(
        batch_size, map_type))
    pyro.clear_param_store()

    def model():
        loc_latent = pyro.sample(
            "loc_latent",
            dist.Normal(loc0, torch.pow(lam0, -0.5)).to_event(1))
        if map_type == "iplate":
            for i in pyro.plate("aaa", len(data), batch_size):
                pyro.sample(
                    "obs_%d" % i,
                    dist.Normal(loc_latent, torch.pow(lam, -0.5)).to_event(1),
                    obs=data[i],
                ),
        elif map_type == "plate":
            with pyro.plate("aaa", len(data), batch_size) as ind:
                pyro.sample(
                    "obs",
                    dist.Normal(loc_latent, torch.pow(lam, -0.5)).to_event(1),
                    obs=data[ind],
                ),
        else:
            for i, x in enumerate(data):
                pyro.sample(
                    "obs_%d" % i,
                    dist.Normal(loc_latent, torch.pow(lam, -0.5)).to_event(1),
                    obs=x,
                )
        return loc_latent

    def guide():
        loc_q = pyro.param(
            "loc_q",
            analytic_loc_n.detach().clone() + torch.tensor([-0.18, 0.23]))
        log_sig_q = pyro.param(
            "log_sig_q",
            analytic_log_sig_n.detach().clone() - torch.tensor([-0.18, 0.23]),
        )
        sig_q = torch.exp(log_sig_q)
        pyro.sample("loc_latent", dist.Normal(loc_q, sig_q).to_event(1))
        if map_type == "iplate" or map_type is None:
            for i in pyro.plate("aaa", len(data), batch_size):
                pass
        elif map_type == "plate":
            # dummy plate to do subsampling for observe
            with pyro.plate("aaa", len(data), batch_size):
                pass
        else:
            pass

    adam = optim.Adam({"lr": lr})
    svi = SVI(model, guide, adam, loss=TraceGraph_ELBO())

    for k in range(n_steps):
        svi.step()

        loc_error = torch.sum(
            torch.pow(analytic_loc_n - pyro.param("loc_q"), 2.0))
        log_sig_error = torch.sum(
            torch.pow(analytic_log_sig_n - pyro.param("log_sig_q"), 2.0))

        if k % 500 == 0:
            logger.debug("errors - {}, {}".format(loc_error, log_sig_error))

    assert_equal(loc_error.item(), 0, prec=0.05)
    assert_equal(log_sig_error.item(), 0, prec=0.06)
예제 #58
0
    def test_nested_list_map_data_in_elbo(self, n_steps=4000):
        pyro.clear_param_store()

        def model():
            mu_latent = pyro.sample(
                    "mu_latent",
                    dist.Normal(self.mu0, torch.pow(self.lam0, -0.5), reparameterized=False))

            def obs_outer(i, x):
                pyro.map_data("map_obs_inner_%d" % i, x, lambda _i, _x:
                              obs_inner(i, _i, _x), batch_size=3)

            def obs_inner(i, _i, _x):
                pyro.observe("obs_%d_%d" % (i, _i), dist.normal, _x, mu_latent,
                             torch.pow(self.lam, -0.5))

            pyro.map_data("map_obs_outer", self.data, lambda i, x:
                          obs_outer(i, x), batch_size=3)

            return mu_latent

        def guide():
            mu_q = pyro.param("mu_q", Variable(self.analytic_mu_n.data + 0.234 * torch.ones(2),
                                               requires_grad=True))
            log_sig_q = pyro.param("log_sig_q", Variable(
                                   self.analytic_log_sig_n.data - 0.27 * torch.ones(2),
                                   requires_grad=True))
            sig_q = torch.exp(log_sig_q)
            mu_latent = pyro.sample(
                    "mu_latent",
                    dist.Normal(mu_q, sig_q, reparameterized=False),
                    baseline=dict(use_decaying_avg_baseline=True))

            def obs_outer(i, x):
                pyro.map_data("map_obs_inner_%d" % i, x, lambda _i, _x:
                              None, batch_size=3)

            pyro.map_data("map_obs_outer", self.data, lambda i, x:
                          obs_outer(i, x), batch_size=3)

            return mu_latent

        guide_trace = pyro.poutine.trace(guide, graph_type="dense").get_trace()
        model_trace = pyro.poutine.trace(pyro.poutine.replay(model, guide_trace),
                                         graph_type="dense").get_trace()
        assert len(model_trace.edges()) == 27
        assert len(model_trace.nodes()) == 16
        assert len(guide_trace.edges()) == 0
        assert len(guide_trace.nodes()) == 9

        adam = optim.Adam({"lr": 0.0008, "betas": (0.96, 0.999)})
        svi = SVI(model, guide, adam, loss="ELBO", trace_graph=True)

        for k in range(n_steps):
            svi.step()

            mu_error = param_mse("mu_q", self.analytic_mu_n)
            log_sig_error = param_mse("log_sig_q", self.analytic_log_sig_n)
            if k % 500 == 0 and self.verbose:
                print("mu error, log(sigma) error:  %.4f, %.4f" % (mu_error, log_sig_error))

        self.assertEqual(0.0, mu_error, prec=0.04)
        self.assertEqual(0.0, log_sig_error, prec=0.04)
예제 #59
0
            [["mean", "std", "5%", "25%", "50%", "75%", "95%"]]
    return site_stats

# Prepare training data
train = torch.tensor(df.values, dtype=torch.float)

svi = SVI(model,
          guide,
          optim.Adam({"lr": .005}),
          loss=Trace_ELBO(),
          num_samples=1000)
x_data, y_data = train[:, :-1], train[:, 2]
pyro.clear_param_store()
num_iters = 8000 if not smoke_test else 2
for i in range(num_iters):
    elbo = svi.step(x_data, y_data)
    if i % 500 == 0:
        logging.info("Elbo loss: {}".format(elbo))

posterior = svi.run(x_data, y_data)   

sites = ["a", "bA", "bR", "bAR", "sigma"]

for site, values in summary(posterior, sites).items():
    print("Site: {}".format(site))
    print(values, "\n")


def wrapped_model(x_data, y_data):
    pyro.sample("prediction", dist.Delta(model(x_data, y_data)))
예제 #60
0
# setup visdom for visualization
if args.visdom_flag:
    vis = visdom.Visdom()

train_elbo = []
test_elbo = []
# training loop
for epoch in range(args.num_epochs):
    # initialize loss accumulator
    epoch_loss = 0.
    # do a training epoch over each mini-batch x returned
    # by the data loader
    for x,_ in loader:
        # do ELBO gradient and accumulate loss
        epoch_loss += svi.step(x)

    # report training diagnostics
    normalizer_train = len(train_loader.dataset)
    total_epoch_loss_train = epoch_loss / normalizer_train
    train_elbo.append(total_epoch_loss_train)
    print("[epoch %03d]  average training loss: %.4f" % (epoch, total_epoch_loss_train))

    torch.save(vae.state_dict(), args.model_file)

#     if epoch % args.test_frequency == 0:
#         # initialize loss accumulator
#         test_loss = 0.
#         # compute the loss over the entire test set
#         for i, (x, _) in enumerate(test_loader):
#             # if on GPU put mini-batch into CUDA memory