Exemple #1
0
def main(args):
    logging.info('Generating data')
    pyro.set_rng_seed(0)
    pyro.clear_param_store()
    pyro.enable_validation(__debug__)

    # We can generate synthetic data directly by calling the model.
    true_topic_weights, true_topic_words, data = model(args=args)

    # We'll train using SVI.
    logging.info('-' * 40)
    logging.info('Training on {} documents'.format(args.num_docs))
    predictor = make_predictor(args)
    guide = functools.partial(parametrized_guide, predictor)
    Elbo = JitTraceEnum_ELBO if args.jit else TraceEnum_ELBO
    elbo = Elbo(max_plate_nesting=2)
    optim = ClippedAdam({'lr': args.learning_rate})
    svi = SVI(model, guide, optim, elbo)
    logging.info('Step\tLoss')
    for step in range(args.num_steps):
        loss = svi.step(data, args=args, batch_size=args.batch_size)
        if step % 10 == 0:
            logging.info('{: >5d}\t{}'.format(step, loss))
    loss = elbo.loss(model, guide, data, args=args)
    logging.info('final loss = {}'.format(loss))
    def fit(self, model_name, model_param_names, data_input, init_values=None):
        # verbose is passed through from orbit.models.base_estimator
        verbose = self.verbose
        message = self.message
        learning_rate = self.learning_rate
        learning_rate_total_decay = self.learning_rate_total_decay
        num_sample = self.num_sample
        seed = self.seed
        num_steps = self.num_steps

        pyro.set_rng_seed(seed)
        Model = get_pyro_model(model_name)  # abstract
        model = Model(data_input)  # concrete

        # Perform stochastic variational inference using an auto guide.
        pyro.clear_param_store()
        guide = AutoLowRankMultivariateNormal(model)
        optim = ClippedAdam({
            "lr": learning_rate,
            "lrd": learning_rate_total_decay**(1 / num_steps)
        })
        elbo = Trace_ELBO(num_particles=self.num_particles,
                          vectorize_particles=True)
        svi = SVI(model, guide, optim, elbo)

        for step in range(num_steps):
            loss = svi.step()
            if verbose and step % message == 0:
                scale_rms = guide._loc_scale()[1].detach().pow(
                    2).mean().sqrt().item()
                print("step {: >4d} loss = {:0.5g}, scale = {:0.5g}".format(
                    step, loss, scale_rms))

        # Extract samples.
        vectorize = pyro.plate("samples",
                               num_sample,
                               dim=-1 - model.max_plate_nesting)
        with pyro.poutine.trace() as tr:
            samples = vectorize(guide)()
        with pyro.poutine.replay(trace=tr.trace):
            samples.update(vectorize(model)())

        # Convert from torch.Tensors to numpy.ndarrays.
        extract = {
            name: value.detach().squeeze().numpy()
            for name, value in samples.items()
        }

        # make sure that model param names are a subset of stan extract keys
        invalid_model_param = set(model_param_names) - set(list(
            extract.keys()))
        if invalid_model_param:
            raise EstimatorException(
                "Stan model definition does not contain required parameters")

        # `stan.optimizing` automatically returns all defined parameters
        # filter out unecessary keys
        extract = {param: extract[param] for param in model_param_names}

        return extract
Exemple #3
0
    def fit(
        self,
        x,
        t,
        y,
        num_epochs=100,
        batch_size=100,
        learning_rate=1e-3,
        learning_rate_decay=0.1,
        weight_decay=1e-4,
        log_every=100,
    ):
        """
        Train using :class:`~pyro.infer.svi.SVI` with the
        :class:`TraceCausalEffect_ELBO` loss.

        :param ~torch.Tensor x:
        :param ~torch.Tensor t:
        :param ~torch.Tensor y:
        :param int num_epochs: Number of training epochs. Defaults to 100.
        :param int batch_size: Batch size. Defaults to 100.
        :param float learning_rate: Learning rate. Defaults to 1e-3.
        :param float learning_rate_decay: Learning rate decay over all epochs;
            the per-step decay rate will depend on batch size and number of epochs
            such that the initial learning rate will be ``learning_rate`` and the final
            learning rate will be ``learning_rate * learning_rate_decay``.
            Defaults to 0.1.
        :param float weight_decay: Weight decay. Defaults to 1e-4.
        :param int log_every: Log loss each this-many steps. If zero,
            do not log loss. Defaults to 100.
        :return: list of epoch losses
        """
        assert x.dim() == 2 and x.size(-1) == self.feature_dim
        assert t.shape == x.shape[:1]
        assert y.shape == y.shape[:1]
        self.whiten = PreWhitener(x)

        dataset = TensorDataset(x, t, y)
        dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
        logger.info("Training with {} minibatches per epoch".format(
            len(dataloader)))
        num_steps = num_epochs * len(dataloader)
        optim = ClippedAdam({
            "lr": learning_rate,
            "weight_decay": weight_decay,
            "lrd": learning_rate_decay**(1 / num_steps),
        })
        svi = SVI(self.model, self.guide, optim, TraceCausalEffect_ELBO())
        losses = []
        for epoch in range(num_epochs):
            for x, t, y in dataloader:
                x = self.whiten(x)
                loss = svi.step(x, t, y, size=len(dataset)) / len(dataset)
                if log_every and len(losses) % log_every == 0:
                    logger.debug("step {: >5d} loss = {:0.6g}".format(
                        len(losses), loss))
                assert not torch_isnan(loss)
                losses.append(loss)
        return losses
Exemple #4
0
    def fit(self,
            model_name,
            model_param_names,
            data_input,
            fitter=None,
            init_values=None):
        verbose = self.verbose
        message = self.message
        learning_rate = self.learning_rate
        seed = self.seed
        num_steps = self.num_steps
        learning_rate_total_decay = self.learning_rate_total_decay

        pyro.set_rng_seed(seed)
        if fitter is None:
            fitter = get_pyro_model(model_name)  # abstract
        model = fitter(data_input)  # concrete

        # Perform MAP inference using an AutoDelta guide.
        pyro.clear_param_store()
        guide = AutoDelta(model)
        optim = ClippedAdam({
            "lr": learning_rate,
            "lrd": learning_rate_total_decay**(1 / num_steps),
            "betas": (0.5, 0.8)
        })
        elbo = Trace_ELBO()
        loss_elbo = list()
        svi = SVI(model, guide, optim, elbo)
        for step in range(num_steps):
            loss = svi.step()
            loss_elbo.append(loss)
            if verbose and step % message == 0:
                print("step {: >4d} loss = {:0.5g}".format(step, loss))

        # Extract point estimates.
        values = guide()
        values.update(pyro.poutine.condition(model, values)())

        # Convert from torch.Tensors to numpy.ndarrays.
        extract = {
            name: value.detach().numpy()
            for name, value in values.items()
        }

        # make sure that model param names are a subset of stan extract keys
        invalid_model_param = set(model_param_names) - set(list(
            extract.keys()))
        if invalid_model_param:
            raise EstimatorException(
                "Pyro model definition does not contain required parameters")

        # `stan.optimizing` automatically returns all defined parameters
        # filter out unnecessary keys
        posteriors = {param: extract[param] for param in model_param_names}
        training_metrics = {'loss_elbo': np.array(loss_elbo)}

        return posteriors, training_metrics
Exemple #5
0
def main(args):
    """
    run inference for CVAE
    :param args: arguments for CVAE
    :return: None
    """
    if args.seed is not None:
        set_seed(args.seed, args.cuda)

    if os.path.exists('cvae.model.pt'):
        print('Loading model %s' % 'cvae.model.pt')
        cvae = torch.load('cvae.model.pt')

    else:

        cvae = CVAE(z_dim=args.z_dim,
                    y_dim=8,
                    x_dim=32612,
                    hidden_dim=args.hidden_dimension,
                    use_cuda=args.cuda)

    print(cvae)

    # setup the optimizer
    adam_params = {
        "lr": args.learning_rate,
        "betas": (args.beta_1, 0.999),
        "clip_norm": 0.5
    }
    optimizer = ClippedAdam(adam_params)
    guide = config_enumerate(cvae.guide, args.enum_discrete)

    # set up the loss for inference.
    loss = SVI(cvae.model,
               guide,
               optimizer,
               loss=TraceEnum_ELBO(max_iarange_nesting=1))

    try:
        # setup the logger if a filename is provided
        logger = open(args.logfile, "w") if args.logfile else None

        data_loaders = setup_data_loaders(NHANES, args.cuda, args.batch_size)
        print(len(data_loaders['prediction']))

        #torch.save(cvae, 'cvae.model.pt')

        mu, sigma, actuals, lods, masks = get_predictions(
            data_loaders["prediction"], cvae.sim_measurements)

        torch.save((mu, sigma, actuals, lods, masks), 'cvae.predictions.pt')

    finally:
        # close the logger file object if we opened it earlier
        if args.logfile:
            logger.close()
Exemple #6
0
def make_svi(model,
             guide,
             args=None,
             kwargs=None,
             steps=1000,
             lr=0.05,
             cut_time=slice(None, None),
             max_steps=2000,
             ensure_convergence=False,
             loss='ELBO'):
    adam_params = {
        "lr": lr,
        "betas": (0.90, 0.999),
        'weight_decay': 0.005,
        'clip_norm': 10
    }
    optimizer = ClippedAdam(adam_params)

    #     svi = SVI(model, guide, optimizer, loss=trace_mle(cut_time))
    if loss == 'ELBO':
        svi = SVI(model, guide, optimizer, loss=JitTraceEnum_ELBO())
    if loss == 'MLE':
        svi = SVI(model, guide, optimizer, loss=trace_mle())

    pbar = tqdm(range(1, steps + 1))
    time_start = 0

    loss_arr = []

    for i in pbar:
        loss, time_start = make_step(svi, pbar, time_start, args, kwargs)
        loss_arr.append(loss)

    while ensure_convergence:
        std_prev = np.std(loss_arr[-20:-1])
        mean_cur = np.mean(loss_arr[-100:])
        mean_prev = np.mean(loss_arr[-200:-100])
        prob = stat.norm(mean_prev, std_prev).cdf(mean_cur)
        #         print(prob, mean_cur, mean_prev, std_prev)
        if mean_cur < mean_prev and prob < 0.05 and len(loss_arr) < max_steps:
            pbar = tqdm(range(1, 100 + 1), leave=False)
            for j in pbar:
                loss, time_start = make_step(svi,
                                             pbar,
                                             time_start,
                                             args,
                                             kwargs,
                                             prefix='Extra: ')
                loss_arr.append(loss)
        else:
            break

    return loss
Exemple #7
0
def train(args):
    model = Model(args.dim, 2 * args.rank)
    guide = AutoLowRankMultivariateNormal(model,
                                          rank=args.rank,
                                          init_scale=0.01)
    optim = ClippedAdam({"lr": args.learning_rate})
    elbo = Trace_ELBO()
    svi = SVI(model, guide, optim, elbo)
    losses = []
    for step in range(args.num_steps):
        loss = svi.step() / args.dim
        losses.append(loss)
        if step % 100 == 0:
            print("step {: >4} loss = {:0.8g}".format(step, loss))
Exemple #8
0
def test_broken_plates_smoke(backend):
    def model():
        with pyro.plate("i", 2):
            a = pyro.sample("a", dist.Normal(0, 1))
        pyro.sample("b", dist.Normal(a.mean(-1), 1), obs=torch.tensor(0.0))

    guide = AutoGaussian(model, backend=backend)
    svi = SVI(model, guide, ClippedAdam({"lr": 1e-8}), Trace_ELBO())
    for step in range(2):
        with xfail_if_not_implemented():
            svi.step()
    guide()
    predictive = Predictive(model, guide=guide, num_samples=2)
    predictive()
Exemple #9
0
def test_pyrocov_smoke(model, Guide, backend):
    T, P, S, F = 3, 4, 5, 6
    dataset = {
        "features": torch.randn(S, F),
        "local_time": torch.randn(T, P),
        "weekly_strains": torch.randn(T, P, S).exp().round(),
    }

    guide = Guide(model, backend=backend)
    svi = SVI(model, guide, ClippedAdam({"lr": 1e-8}), Trace_ELBO())
    for step in range(2):
        with xfail_if_not_implemented():
            svi.step(dataset)
    guide(dataset)
    predictive = Predictive(model, guide=guide, num_samples=2)
    predictive(dataset)
Exemple #10
0
def main(args):
    pyro.set_rng_seed(0)
    pyro.clear_param_store()
    pyro.enable_validation(__debug__)

    # Loading data
    corpora = prepro_file_load("corpora")
    documents = list(prepro_file_load("id2pre_text").values())
    documents = [re.sub("[\[\]',]", "", doc).split() for doc in documents]
    data = [torch.tensor(list(filter(lambda a: a != -1, corpora.doc2idx(doc))), dtype=torch.int64) for doc in documents]
    N = list(map(len, data))
    args.num_words = len(corpora)
    args.num_docs = len(data)

    # We'll train using SVI.
    logging.info('Training on {} documents'.format(args.num_docs))
    predictor = make_predictor(args)
    guide = functools.partial(parametrized_guide, predictor)
    Elbo = JitTraceEnum_ELBO if args.jit else Trace_ELBO
    elbo = Elbo(max_plate_nesting=2)
    optim = ClippedAdam({'lr': args.learning_rate})
    svi = SVI(model, guide, optim, elbo)

    losses = []

    logging.info('Step\tLoss')
    for step in tqdm(range(args.num_steps)):
        loss = svi.step(data, N, args=args)
        losses.append(loss)
        if step % 10 == 0:
            # logging.info('{: >5d}\t{}'.format(step, loss))
            logging.info(f"Loss: {loss}")
    loss = elbo.loss(model, guide, data, N, args=args)
    logging.info('final loss = {}'.format(loss))

    # Plot loss over iterations
    plt.plot(losses)
    plt.title("ELBO")
    plt.xlabel("step")
    plt.ylabel("loss")
    plot_file_name = "../loss-2017_variable-sizes_only-word-data.png"
    plt.savefig(plot_file_name)
    plt.show()

    # save model
    torch.save({"model": predictor.state_dict(), "guide": guide}, "../mymodel.pt")
    pyro.get_param_store().save("mymodelparams.pt")
Exemple #11
0
def train_model(pmf):
    def model(data):
        # sample f from the prior
        # Probabilities are generated by the pmf
        f = pyro.sample("latent_fairness", pmf)
        f2 = dist.Bernoulli(f)
        for i in range(len(data)):
            s = pyro.sample("obs_{}".format(i), f2, obs=data[i])

    def guide(data):
        alpha_q = pyro.param("alpha_q",
                             torch.tensor(15.0),
                             constraint=constraints.positive)
        beta_q = pyro.param("beta_q",
                            torch.tensor(15.0),
                            constraint=constraints.positive)
        # sample latent_fairness from the distribution Beta(alpha_q, beta_q)
        pyro.sample("latent_fairness", dist.Beta(alpha_q, beta_q))

    adam_params = {"lr": 0.0005, "betas": (0.90, 0.999)}
    optimizer = ClippedAdam(adam_params)

    svi = SVI(model, guide, optimizer, loss=Trace_ELBO())

    for step in range(n_steps):
        loss = svi.step(data)
        if step % 100 == 0:
            logging.info(".")
            logging.info("Elbo loss: {}".format(loss))

    # grab the learned variational parameters
    a_q = pyro.param("alpha_q").item()
    b_q = pyro.param("beta_q").item()

    inferred_mean = a_q / (a_q + b_q)
    # compute inferred standard deviation
    factor = b_q / (a_q * (1.0 + a_q + b_q))
    inferred_std = inferred_mean * math.sqrt(factor)
    print("\nbased on the data and our prior belief, the fairness " +
          "of the coin is %.3f +- %.3f" % (inferred_mean, inferred_std))

    beta_posterior = torch.distributions.beta.Beta(a_q, b_q)
    posterior = torch.distributions.bernoulli.Bernoulli(
        beta_posterior.sample())
    logging.info("Sampling:{}".format(posterior.sample()))
Exemple #12
0
def main(args):
    logging.info('Generating data')
    # WL: commented. =====
    # pyro.set_rng_seed(0)
    # ====================
    pyro.clear_param_store()
    pyro.enable_validation(__debug__)

    # We can generate synthetic data directly by calling the model.
    true_topic_weights, true_topic_words, data = model(args=args)

    # We'll train using SVI.
    logging.info('-' * 40)
    logging.info('Training on {} documents'.format(args.num_docs))
    predictor = make_predictor(args)
    guide = functools.partial(parametrized_guide, predictor)
    Elbo = JitTraceEnum_ELBO if args.jit else TraceEnum_ELBO
    elbo = Elbo(max_plate_nesting=2)
    optim = ClippedAdam({'lr': args.learning_rate})
    svi = SVI(model, guide, optim, elbo)

    # WL: edited. =====
    # logging.info('Step\tLoss')
    logging.info(args)
    times = [time.time()]
    logging.info('\nstep\t' + 'epoch\t' + 'elbo\t' + 'time(sec)')
    # =================

    # WL: edited. =====
    # for step in range(args.num_steps):
    for step in range(1, args.num_steps+1):
    # =================
        loss = svi.step(data, args=args, batch_size=args.batch_size)

        # WL: edited. =====
        # if step % 10 == 0:
        #    logging.info('{: >5d}\t{}'.format(step, loss))
        if (step % 10 == 0) or (step == 1):
            times.append(time.time())
            logging.info(f'{step:06d}\t'
                         f'{(step * args.batch_size) / args.num_docs:.3f}\t'
                         f'{-loss:.4f}\t'
                         f'{times[-1]-times[-2]:.3f}')
Exemple #13
0
def train_model(data, n_steps, pmf):

    def model(data):
        mu = 2.8
        num_sigmas = 4
        sigma = 0.3
        low = mu - num_sigmas * sigma
        high = mu + num_sigmas * sigma

        f = pyro.sample("latent", dist.Uniform(low, high))
        print(f)
        # sample f from the prior
        # Probabilities are generated by the pmf
        for i in range(len(data)):
            pyro.sample("obs_{}".format(i), Poisson(f), obs=data[i])


    def guide(data):
        lam = pyro.param("lam", torch.tensor(2.0), constraint=constraints.positive)
        # alpha_q = pyro.param("alpha_q", torch.tensor(2.0))
        # beta_q = pyro.param("beta_q", torch.tensor(1.0))
        pyro.sample("latent", dist.Poisson(torch.tensor(2.0)))

    adam_params = {"lr": 0.0005, "betas": (0.90, 0.999)}
    optimizer = ClippedAdam(adam_params)

    svi = SVI(model, guide, optimizer, loss=Trace_ELBO())

    for step in range(n_steps):
        loss = svi.step(data)
        if step % 100 == 0:
            logging.info(".")
            logging.info("Elbo loss: {}".format(loss))

    # grab the learned variational parameters
    lam = pyro.param("lam").item()
    print(lam)
    # a_q = pyro.param("alpha_q").item()
    # b_q = pyro.param("beta_q").item()
    # print(a_q, b_q)
    posterior = Poisson(lam)
    logging.info("Sampling:{}".format(posterior.sample()))
Exemple #14
0
def test_intractable_smoke(backend):
    def model():
        i_plate = pyro.plate("i", 2, dim=-1)
        j_plate = pyro.plate("j", 3, dim=-2)
        with i_plate:
            a = pyro.sample("a", dist.Normal(0, 1))
        with j_plate:
            b = pyro.sample("b", dist.Normal(0, 1))
        with i_plate, j_plate:
            c = pyro.sample("c", dist.Normal(a + b, 1))
            pyro.sample("d", dist.Normal(c, 1), obs=torch.zeros(3, 2))

    guide = AutoGaussian(model, backend=backend)
    svi = SVI(model, guide, ClippedAdam({"lr": 1e-8}), Trace_ELBO())
    for step in range(2):
        with xfail_if_not_implemented():
            svi.step()
    guide()
    predictive = Predictive(model, guide=guide, num_samples=2)
    predictive()
def run_inference(model, guide, home_id, away_id, score1, score2, args):
    gamma = 0.01  # final learning rate will be gamma * initial_lr
    lrd = gamma**(1 / args.num_iterations)

    svi = SVI(
        model=model,
        guide=guide,
        optim=ClippedAdam({
            "lr": args.learning_rate,
            "lrd": lrd
        }),
        loss=Trace_ELBO(num_particles=args.num_particles),
    )

    pyro.clear_param_store()  # clear global parameter cache
    pyro.set_rng_seed(args.rng_seed)

    advi_loss = []
    for j in range(args.num_iterations):
        # calculate the loss and take a gradient step
        loss = svi.step(
            home_id=home_id,
            away_id=away_id,
            score1_obs=score1,
            score2_obs=score2,
        )
        advi_loss.append(loss)
        if j % 100 == 0:
            print("[iteration %4d] loss: %.4f" % (j + 1, loss))

    print("Posterior: ")
    for i in pyro.get_param_store().items():
        print(i)

    fit = Predictive(model=model, guide=guide,
                     num_samples=2000)(home_id=home_id, away_id=away_id)

    return fit
Exemple #16
0
    def __init__(self, _c: "VAEConfig"):
        super().__init__()
        self._c = _c
        self.image_flatten_dim = _c.image_dim[0] * _c.image_dim[1]

        adam_params = {
            "lr": _c.init_lr,
            "betas": (0.96, 0.999),
            "clip_norm": 10.0,
            "lrd": 0.99996,
            "weight_decay": 2.0
        }
        self.optimizer = ClippedAdam(adam_params)

        self.emitter = Decoder(_c.z_dim,
                               _c.emitter_channel,
                               dropout_p=_c.dropout_rate)
        self.trans = GatedTransition(_c.z_dim, _c.transition_dim)
        self.combiner = Combiner(_c.z_dim, _c.rnn_dim)

        self.crnn = ConvRNN(_c.image_dim,
                            _c.rnn_dim,
                            _c.rnn_layers,
                            _c.dropout_rate,
                            use_lstm=_c.use_lstm,
                            channels=_c.crnn_channel)
        self.iafs = [
            affine_autoregressive(_c.z_dim, hidden_dims=[_c.iaf_dim])
            for _ in range(_c.num_iafs)
        ]
        self.iafs_modules = nn.ModuleList(self.iafs)

        self.z_0 = nn.Parameter(torch.zeros(_c.z_dim))
        self.z_q_0 = nn.Parameter(torch.zeros(_c.z_dim))
        self.h_0 = nn.Parameter(torch.zeros(1, 1, _c.rnn_dim))
        if _c.use_lstm:
            self.c_0 = nn.Parameter(torch.zeros(1, 1, _c.rnn_dim))
        self.cuda()
Exemple #17
0
 def run_svi(self, n_steps=100, num_particles=10, clear_params=False):
     if not clear_params:
         pyro.clear_param_store()
     opt = ClippedAdam(self.optimizer_params)
     svi = SVI(self.model,
               self.guide,
               opt,
               loss=Trace_ELBO(num_particles=num_particles))
     loss = []
     pred_prob = []
     valid_prob = []
     for step in range(n_steps):
         curr_loss = svi.step(self.data)
         prob = self.calc_log_sum(self.data, num_particles)
         valid_p = self.calc_log_sum(self.valid_data, num_particles)
         loss.append(curr_loss)
         pred_prob.append(prob)
         valid_prob.append(valid_p)
         if step % (n_steps // 20) == 0:
             message = '{:.0%} ({:.1f}) ({:.1f}) ({:.1f})'.format(
                 step / n_steps, curr_loss, prob, valid_p)
             print(message, end=' | ')
     return loss, pred_prob, valid_prob
Exemple #18
0
def train(args, dataset):
    """
    Train a model and guide to fit a dataset.
    """
    counts = dataset["counts"]
    num_stations = len(dataset["stations"])
    logging.info(
        "Training on {} stations over {} hours, {} batches/epoch".format(
            num_stations, len(counts),
            int(math.ceil(len(counts) / args.batch_size))))
    time_features = make_time_features(args, 0, len(counts))
    control_features = (counts.max(1)[0] + counts.max(2)[0]).clamp(max=1)
    logging.info(
        "On average {:0.1f}/{} stations are open at any one time".format(
            control_features.sum(-1).mean(), num_stations))
    features = torch.cat([time_features, control_features], -1)
    feature_dim = features.size(-1)
    logging.info("feature_dim = {}".format(feature_dim))
    metadata = {"args": args, "losses": [], "control": control_features}
    torch.save(metadata, args.training_filename)

    def optim_config(module_name, param_name):
        config = {
            "lr": args.learning_rate,
            "betas": (0.8, 0.99),
            "weight_decay": 0.01**(1 / args.num_steps),
        }
        if param_name == "init_scale":
            config["lr"] *= 0.1  # init_dist sees much less data per minibatch
        return config

    training_counts = counts[:args.truncate] if args.truncate else counts
    data_size = len(training_counts)
    model = Model(args, features, training_counts)
    guide = Guide(args, features, training_counts)
    elbo = Trace_ELBO()
    optim = ClippedAdam(optim_config)
    svi = SVI(model, guide, optim, elbo)
    losses = []
    forecaster = None
    for step in range(args.num_steps):
        begin_time = torch.randint(max(1, data_size - args.batch_size),
                                   ()).item()
        end_time = min(data_size, begin_time + args.batch_size)
        feature_batch = features[begin_time:end_time]
        counts_batch = counts[begin_time:end_time]
        loss = svi.step(feature_batch, counts_batch) / counts_batch.numel()
        assert math.isfinite(loss), loss
        losses.append(loss)
        logging.debug("step {} loss = {:0.4g}".format(step, loss))

        if step % 20 == 0:
            # Save state every few steps.
            pyro.get_param_store().save(args.param_store_filename)
            metadata = {
                "args": args,
                "losses": losses,
                "control": control_features
            }
            torch.save(metadata, args.training_filename)
            forecaster = Forecaster(args, dataset, features, model, guide)
            torch.save(forecaster, args.forecaster_filename)

            if logging.Logger(None).isEnabledFor(logging.DEBUG):
                init_scale = pyro.param("init_scale").data
                trans_scale = pyro.param("trans_scale").data
                trans_matrix = pyro.param("trans_matrix").data
                eigs = trans_matrix.eig()[0].norm(dim=-1).sort(
                    descending=True).values
                logging.debug("guide.diag_part = {}".format(
                    guide.diag_part.data.squeeze()))
                logging.debug(
                    "init scale min/mean/max: {:0.3g} {:0.3g} {:0.3g}".format(
                        init_scale.min(), init_scale.mean(), init_scale.max()))
                logging.debug(
                    "trans scale min/mean/max: {:0.3g} {:0.3g} {:0.3g}".format(
                        trans_scale.min(), trans_scale.mean(),
                        trans_scale.max()))
                logging.debug("trans mat eig:\n{}".format(eigs))

    return forecaster
Exemple #19
0
def main(args):
    ## ドレミ
    def easyTones():
        training_seq_lengths = torch.tensor([8]*1)
        training_data_sequences = torch.zeros(1,8,88)
        for i in range(1):
            training_data_sequences[i][0][int(70-i*10)  ] = 1
            training_data_sequences[i][1][int(70-i*10)+2] = 1
            training_data_sequences[i][2][int(70-i*10)+4] = 1
            training_data_sequences[i][3][int(70-i*10)+5] = 1
            training_data_sequences[i][4][int(70-i*10)+7] = 1
            training_data_sequences[i][5][int(70-i*10)+9] = 1
            training_data_sequences[i][6][int(70-i*10)+11] = 1
            training_data_sequences[i][7][int(70-i*10)+12] = 1
        return training_seq_lengths, training_data_sequences

    def superEasyTones():
        training_seq_lengths = torch.tensor([8]*10)
        training_data_sequences = torch.zeros(10,8,88)
        for i in range(10):
            for j in range(8):
                training_data_sequences[i][j][int(30+i*5)] = 1
        return training_seq_lengths, training_data_sequences

    ## ドドド、ドドド、ドドド
    def easiestTones():
        training_seq_lengths = torch.tensor([8]*10)
        training_data_sequences = torch.zeros(10,8,88)
        for i in range(10):
            for j in range(8):
                training_data_sequences[i][j][int(70)] = 1
        return training_seq_lengths, training_data_sequences

    # setup logging
    logging.basicConfig(level=logging.DEBUG, format='%(message)s', filename=args.log, filemode='w')
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    logging.getLogger('').addHandler(console)
    logging.info(args)

    data = poly.load_data(poly.JSB_CHORALES)
    training_seq_lengths = data['train']['sequence_lengths']
    training_data_sequences = data['train']['sequences']
    training_seq_lengths, training_data_sequences = easiestTones()
    test_seq_lengths = data['test']['sequence_lengths']
    test_data_sequences = data['test']['sequences']
    test_seq_lengths, test_data_sequences = easiestTones()
    val_seq_lengths = data['valid']['sequence_lengths']
    val_data_sequences = data['valid']['sequences']
    val_seq_lengths, val_data_sequences = easiestTones()
    N_train_data = len(training_seq_lengths)
    N_train_time_slices = float(torch.sum(training_seq_lengths))
    N_mini_batches = int(N_train_data / args.mini_batch_size +
                         int(N_train_data % args.mini_batch_size > 0))

    logging.info("N_train_data: %d     avg. training seq. length: %.2f    N_mini_batches: %d" %
                 (N_train_data, training_seq_lengths.float().mean(), N_mini_batches))

    # how often we do validation/test evaluation during training
    val_test_frequency = 50
    # the number of samples we use to do the evaluation
    n_eval_samples = 1

    # package repeated copies of val/test data for faster evaluation
    # (i.e. set us up for vectorization)
    def rep(x):
        rep_shape = torch.Size([x.size(0) * n_eval_samples]) + x.size()[1:]
        repeat_dims = [1] * len(x.size())
        repeat_dims[0] = n_eval_samples
        return x.repeat(repeat_dims).reshape(n_eval_samples, -1).transpose(1, 0).reshape(rep_shape)

    # get the validation/test data ready for the dmm: pack into sequences, etc.
    val_seq_lengths = rep(val_seq_lengths)
    test_seq_lengths = rep(test_seq_lengths)
    val_batch, val_batch_reversed, val_batch_mask, val_seq_lengths = poly.get_mini_batch(
        torch.arange(n_eval_samples * val_data_sequences.shape[0]), rep(val_data_sequences),
        val_seq_lengths, cuda=args.cuda)
    test_batch, test_batch_reversed, test_batch_mask, test_seq_lengths = poly.get_mini_batch(
        torch.arange(n_eval_samples * test_data_sequences.shape[0]), rep(test_data_sequences),
        test_seq_lengths, cuda=args.cuda)

    # instantiate the dmm
    dmm = DMM(rnn_dropout_rate=args.rnn_dropout_rate, num_iafs=args.num_iafs,
              iaf_dim=args.iaf_dim, use_cuda=args.cuda)

    # setup optimizer
    adam_params = {"lr": args.learning_rate, "betas": (args.beta1, args.beta2),
                   "clip_norm": args.clip_norm, "lrd": args.lr_decay,
                   "weight_decay": args.weight_decay}
    adam = ClippedAdam(adam_params)

    # setup inference algorithm
    if args.tmc:
        if args.jit:
            raise NotImplementedError("no JIT support yet for TMC")
        tmc_loss = TraceTMC_ELBO()
        dmm_guide = config_enumerate(dmm.guide, default="parallel", num_samples=args.tmc_num_samples, expand=False)
        svi = SVI(dmm.model, dmm_guide, adam, loss=tmc_loss)
    elif args.tmcelbo:
        if args.jit:
            raise NotImplementedError("no JIT support yet for TMC ELBO")
        elbo = TraceEnum_ELBO()
        dmm_guide = config_enumerate(dmm.guide, default="parallel", num_samples=args.tmc_num_samples, expand=False)
        svi = SVI(dmm.model, dmm_guide, adam, loss=elbo)
    else:
        elbo = JitTrace_ELBO() if args.jit else Trace_ELBO()
        svi = SVI(dmm.model, dmm.guide, adam, loss=elbo)

    # now we're going to define some functions we need to form the main training loop

    # saves the model and optimizer states to disk
    def save_checkpoint():
        logging.info("saving model to %s..." % args.save_model)
        torch.save(dmm.state_dict(), args.save_model)
        # logging.info("saving optimizer states to %s..." % args.save_opt)
        # adam.save(args.save_opt)
        logging.info("done saving model and optimizer checkpoints to disk.")

    # loads the model and optimizer states from disk
    def load_checkpoint():
        assert exists(args.load_opt) and exists(args.load_model), \
            "--load-model and/or --load-opt misspecified"
        logging.info("loading model from %s..." % args.load_model)
        dmm.load_state_dict(torch.load(args.load_model))
        logging.info("loading optimizer states from %s..." % args.load_opt)
        adam.load(args.load_opt)
        logging.info("done loading model and optimizer states.")

    # prepare a mini-batch and take a gradient step to minimize -elbo
    def process_minibatch(epoch, which_mini_batch, shuffled_indices):
        if args.annealing_epochs > 0 and epoch < args.annealing_epochs:
            # compute the KL annealing factor approriate for the current mini-batch in the current epoch
            min_af = args.minimum_annealing_factor
            annealing_factor = min_af + (1.0 - min_af) * \
                (float(which_mini_batch + epoch * N_mini_batches + 1) /
                 float(args.annealing_epochs * N_mini_batches))
        else:
            # by default the KL annealing factor is unity
            annealing_factor = 1.0

        # compute which sequences in the training set we should grab
        mini_batch_start = (which_mini_batch * args.mini_batch_size)
        mini_batch_end = np.min([(which_mini_batch + 1) * args.mini_batch_size, N_train_data])
        mini_batch_indices = shuffled_indices[mini_batch_start:mini_batch_end]
        # grab a fully prepped mini-batch using the helper function in the data loader
        mini_batch, mini_batch_reversed, mini_batch_mask, mini_batch_seq_lengths \
            = poly.get_mini_batch(mini_batch_indices, training_data_sequences,
                                  training_seq_lengths, cuda=args.cuda)
        # do an actual gradient step
        loss = svi.step(mini_batch, mini_batch_reversed, mini_batch_mask,
                        mini_batch_seq_lengths, annealing_factor)
        # keep track of the training loss
        return loss

    # helper function for doing evaluation
    def do_evaluation():
        # put the RNN into evaluation mode (i.e. turn off drop-out if applicable)
        dmm.rnn.eval()

        # compute the validation and test loss n_samples many times
        val_nll = svi.evaluate_loss(val_batch, val_batch_reversed, val_batch_mask,
                                    val_seq_lengths) / float(torch.sum(val_seq_lengths))
        test_nll = svi.evaluate_loss(test_batch, test_batch_reversed, test_batch_mask,
                                     test_seq_lengths) / float(torch.sum(test_seq_lengths))

        # put the RNN back into training mode (i.e. turn on drop-out if applicable)
        dmm.rnn.train()
        return val_nll, test_nll

    # if checkpoint files provided, load model and optimizer states from disk before we start training
    if args.load_opt != '' and args.load_model != '':
        load_checkpoint()

    #################
    # TRAINING LOOP #
    #################
    times = [time.time()]
    for epoch in range(args.num_epochs):
        # if specified, save model and optimizer states to disk every checkpoint_freq epochs
        if args.checkpoint_freq > 0 and epoch > 0 and epoch % args.checkpoint_freq == 0:
            save_checkpoint()

        # accumulator for our estimate of the negative log likelihood (or rather -elbo) for this epoch
        epoch_nll = 0.0
        # prepare mini-batch subsampling indices for this epoch
        shuffled_indices = torch.randperm(N_train_data)

        # process each mini-batch; this is where we take gradient steps
        for which_mini_batch in range(N_mini_batches):
            epoch_nll += process_minibatch(epoch, which_mini_batch, shuffled_indices)

        # report training diagnostics
        times.append(time.time())
        epoch_time = times[-1] - times[-2]
        logging.info("[training epoch %04d]  %.4f \t\t\t\t(dt = %.3f sec)" %
                     (epoch, epoch_nll / N_train_time_slices, epoch_time))

        # do evaluation on test and validation data and report results
        if val_test_frequency > 0 and epoch > 0 and epoch % val_test_frequency == 0:
            val_nll, test_nll = do_evaluation()
            logging.info("[val/test epoch %04d]  %.4f  %.4f" % (epoch, val_nll, test_nll))
# Convert the data into tensors
X_train_torch = torch.tensor(X_train_scaled)
y_train_torch = torch.tensor(y_train_scaled)

pyro.clear_param_store()

# Provide a guide which fits a pre-defined distribution over each
# hidden parameter. The AutoDiagonalNormal guide fits a normal
# distribution over each coefficient and our rate parameter
my_guide = AutoDiagonalNormal(model_gamma)


# Initialize the SVI optimzation class
my_svi = SVI(model=model_gamma,
             guide= my_guide,
             optim=ClippedAdam({"lr": 0.01, 'clip_norm': 1.0}),
             loss=Trace_ELBO())

losses = []

start_time = time.time()

# Perform optimization
for i in range(5000):

    loss = my_svi.step(X_train_torch,
                       y_train_torch,  
                       california.feature_names)
    
    normalized_loss = loss/X_train_torch.shape[0]
    
Exemple #21
0
def main(args):
    # Init tensorboard
    writer = SummaryWriter('./runs/' + args.runname + str(args.trialnumber))
    model_name = 'VanillaDMM'

    # Set evaluation log file
    evaluation_logpath = './logs/{}/evaluation_result.log'.format(
        model_name.lower())
    log_evaluation(evaluation_logpath,
                   'Evaluation Trial - {}\n'.format(args.trialnumber))

    # Constants
    time_length = 30
    input_length_for_pred = 20
    pred_length = time_length - input_length_for_pred
    train_batch_size = 16
    valid_batch_size = 1

    # For model
    input_channels = 1
    z_channels = 50
    emission_channels = [64, 32]
    transition_channels = 64
    encoder_channels = [32, 64]
    rnn_input_dim = 256
    rnn_channels = 128
    kernel_size = 3
    pred_length = 0

    # Device checking
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:0" if use_cuda else "cpu")

    # Make dataset
    logging.info("Generate data")
    train_datapath = args.datapath / 'train'
    valid_datapath = args.datapath / 'valid'
    train_dataset = DiffusionDataset(train_datapath)
    valid_dataset = DiffusionDataset(valid_datapath)

    # Create data loaders from pickle data
    logging.info("Generate data loaders")
    train_dataloader = DataLoader(
        train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=8)
    valid_dataloader = DataLoader(
        valid_dataset, batch_size=valid_batch_size, num_workers=4)

    # Training parameters
    width = 100
    height = 100
    input_dim = width * height

    # Create model
    logging.warning("Generate model")
    logging.warning(input_dim)
    pred_input_dim = 10
    dmm = DMM(input_channels=input_channels, z_channels=z_channels, emission_channels=emission_channels,
              transition_channels=transition_channels, encoder_channels=encoder_channels, rnn_input_dim=rnn_input_dim, rnn_channels=rnn_channels, kernel_size=kernel_size, height=height, width=width, pred_input_dim=pred_input_dim, num_layers=1, rnn_dropout_rate=0.0,
              num_iafs=0, iaf_dim=50, use_cuda=use_cuda)

    # Initialize model
    logging.info("Initialize model")
    epochs = args.endepoch
    learning_rate = 0.0001
    beta1 = 0.9
    beta2 = 0.999
    clip_norm = 10.0
    lr_decay = 1.0
    weight_decay = 0
    adam_params = {"lr": learning_rate, "betas": (beta1, beta2),
                   "clip_norm": clip_norm, "lrd": lr_decay,
                   "weight_decay": weight_decay}
    adam = ClippedAdam(adam_params)
    elbo = Trace_ELBO()
    svi = SVI(dmm.model, dmm.guide, adam, loss=elbo)

    # saves the model and optimizer states to disk
    save_model = Path('./checkpoints/' + model_name)

    def save_checkpoint(epoch):
        save_dir = save_model / '{}.model'.format(epoch)
        save_opt_dir = save_model / '{}.opt'.format(epoch)
        logging.info("saving model to %s..." % save_dir)
        torch.save(dmm.state_dict(), save_dir)
        logging.info("saving optimizer states to %s..." % save_opt_dir)
        adam.save(save_opt_dir)
        logging.info("done saving model and optimizer checkpoints to disk.")

    # Starting epoch
    start_epoch = args.startepoch

    # loads the model and optimizer states from disk
    if start_epoch != 0:
        load_opt = './checkpoints/' + model_name + \
            '/e{}-i188-opt-tn{}.opt'.format(start_epoch - 1, args.trialnumber)
        load_model = './checkpoints/' + model_name + \
            '/e{}-i188-tn{}.pt'.format(start_epoch - 1, args.trialnumber)

        def load_checkpoint():
            # assert exists(load_opt) and exists(load_model), \
            #     "--load-model and/or --load-opt misspecified"
            logging.info("loading model from %s..." % load_model)
            dmm.load_state_dict(torch.load(load_model, map_location=device))
            # logging.info("loading optimizer states from %s..." % load_opt)
            # adam.load(load_opt)
            # logging.info("done loading model and optimizer states.")

        if load_model != '':
            logging.info('Load checkpoint')
            load_checkpoint()

    # Validation only?
    validation_only = args.validonly

    # Train the model
    if not validation_only:
        logging.info("Training model")
        annealing_epochs = 1000
        minimum_annealing_factor = 0.2
        N_train_size = 3000
        N_mini_batches = int(N_train_size / train_batch_size +
                             int(N_train_size % train_batch_size > 0))
        for epoch in tqdm(range(start_epoch, epochs), desc='Epoch', leave=True):
            r_loss_train = 0
            dmm.train(True)
            idx = 0
            mov_avg_loss = 0
            mov_data_len = 0
            for which_mini_batch, data in enumerate(tqdm(train_dataloader, desc='Train', leave=True)):
                if annealing_epochs > 0 and epoch < annealing_epochs:
                    # compute the KL annealing factor approriate for the current mini-batch in the current epoch
                    min_af = minimum_annealing_factor
                    annealing_factor = min_af + (1.0 - min_af) * \
                        (float(which_mini_batch + epoch * N_mini_batches + 1) /
                         float(annealing_epochs * N_mini_batches))
                else:
                    # by default the KL annealing factor is unity
                    annealing_factor = 1.0

                data['observation'] = normalize(
                    data['observation'].unsqueeze(2).to(device))
                batch_size, length, _, w, h = data['observation'].shape
                data_reversed = reverse_sequences(data['observation'])
                data_mask = torch.ones(
                    batch_size, length, input_channels, w, h).cuda()

                loss = svi.step(data['observation'],
                                data_reversed, data_mask, annealing_factor)

                # Running losses
                mov_avg_loss += loss
                mov_data_len += batch_size

                r_loss_train += loss
                idx += 1

            # Average losses
            train_loss_avg = r_loss_train / (len(train_dataset) * time_length)
            writer.add_scalar('Loss/train', train_loss_avg, epoch)
            logging.info("Epoch: %d, Training loss: %1.5f",
                         epoch, train_loss_avg)

            # # Time to time evaluation
            if epoch == epochs - 1:
                for temp_pred_length in [20]:
                    r_loss_valid = 0
                    r_loss_loc_valid = 0
                    r_loss_scale_valid = 0
                    r_loss_latent_valid = 0
                    dmm.train(False)
                    val_pred_length = temp_pred_length
                    val_pred_input_length = 10
                    with torch.no_grad():
                        for i, data in enumerate(tqdm(valid_dataloader, desc='Eval', leave=True)):
                            data['observation'] = normalize(
                                data['observation'].unsqueeze(2).to(device))
                            batch_size, length, _, w, h = data['observation'].shape
                            data_reversed = reverse_sequences(
                                data['observation'])
                            data_mask = torch.ones(
                                batch_size, length, input_channels, w, h).cuda()

                            pred_tensor = data['observation'][:,
                                                              :input_length_for_pred, :, :, :]
                            pred_tensor_reversed = reverse_sequences(
                                pred_tensor)
                            pred_tensor_mask = torch.ones(
                                batch_size, input_length_for_pred, input_channels, w, h).cuda()

                            ground_truth = data['observation'][:,
                                                               input_length_for_pred:, :, :, :]

                            val_nll = svi.evaluate_loss(
                                data['observation'], data_reversed, data_mask)

                            preds, _, loss_loc, loss_scale = do_prediction_rep_inference(
                                dmm, pred_tensor_mask, val_pred_length, val_pred_input_length, data['observation'])

                            ground_truth = denormalize(
                                data['observation'].squeeze().cpu().detach()
                            )
                            pred_with_input = denormalize(
                                torch.cat(
                                    [data['observation'][:, :-val_pred_length, :, :, :].squeeze(),
                                     preds.squeeze()], dim=0
                                ).cpu().detach()
                            )

                            # Running losses
                            r_loss_valid += val_nll
                            r_loss_loc_valid += loss_loc
                            r_loss_scale_valid += loss_scale

                    # Average losses
                    valid_loss_avg = r_loss_valid / \
                        (len(valid_dataset) * time_length)
                    valid_loss_loc_avg = r_loss_loc_valid / \
                        (len(valid_dataset) * val_pred_length * width * height)
                    valid_loss_scale_avg = r_loss_scale_valid / \
                        (len(valid_dataset) * val_pred_length * width * height)
                    writer.add_scalar('Loss/test', valid_loss_avg, epoch)
                    writer.add_scalar(
                        'Loss/test_obs', valid_loss_loc_avg, epoch)
                    writer.add_scalar('Loss/test_scale',
                                      valid_loss_scale_avg, epoch)
                    logging.info("Validation loss: %1.5f", valid_loss_avg)
                    logging.info("Validation obs loss: %1.5f",
                                 valid_loss_loc_avg)
                    logging.info("Validation scale loss: %1.5f",
                                 valid_loss_scale_avg)
                    log_evaluation(evaluation_logpath, "Validation obs loss for {}s pred {}: {}\n".format(
                        val_pred_length, args.trialnumber, valid_loss_loc_avg))
                    log_evaluation(evaluation_logpath, "Validation scale loss for {}s pred {}: {}\n".format(
                        val_pred_length, args.trialnumber, valid_loss_scale_avg))

            # Save model
            if epoch % 50 == 0 or epoch == epochs - 1:
                torch.save(dmm.state_dict(), args.modelsavepath / model_name /
                           'e{}-i{}-tn{}.pt'.format(epoch, idx, args.trialnumber))
                adam.save(args.modelsavepath / model_name /
                          'e{}-i{}-opt-tn{}.opt'.format(epoch, idx, args.trialnumber))

    # Last validation after training
    test_samples_indices = range(100)
    total_n = 0
    if validation_only:
        r_loss_loc_valid = 0
        r_loss_scale_valid = 0
        r_loss_latent_valid = 0
        dmm.train(False)
        val_pred_length = args.validpredlength
        val_pred_input_length = 10
        with torch.no_grad():
            for i in tqdm(test_samples_indices, desc='Valid', leave=True):
                # Data processing
                data = valid_dataset[i]
                if torch.isnan(torch.sum(data['observation'])):
                    print("Skip {}".format(i))
                    continue
                else:
                    total_n += 1
                data['observation'] = normalize(
                    data['observation'].unsqueeze(0).unsqueeze(2).to(device))
                batch_size, length, _, w, h = data['observation'].shape
                data_reversed = reverse_sequences(data['observation'])
                data_mask = torch.ones(
                    batch_size, length, input_channels, w, h).to(device)

                # Prediction
                pred_tensor_mask = torch.ones(
                    batch_size, input_length_for_pred, input_channels, w, h).to(device)
                preds, _, loss_loc, loss_scale = do_prediction_rep_inference(
                    dmm, pred_tensor_mask, val_pred_length, val_pred_input_length, data['observation'])

                ground_truth = denormalize(
                    data['observation'].squeeze().cpu().detach()
                )
                pred_with_input = denormalize(
                    torch.cat(
                        [data['observation'][:, :-val_pred_length, :, :, :].squeeze(),
                         preds.squeeze()], dim=0
                    ).cpu().detach()
                )

                # Save samples
                if i < 5:
                    save_dir_samples = Path('./samples/more_variance_long')
                    with open(save_dir_samples / '{}-gt-test.pkl'.format(i), 'wb') as fout:
                        pickle.dump(ground_truth, fout)
                    with open(save_dir_samples / '{}-vanilladmm-pred-test.pkl'.format(i), 'wb') as fout:
                        pickle.dump(pred_with_input, fout)

                # Running losses
                r_loss_loc_valid += loss_loc
                r_loss_scale_valid += loss_scale
                r_loss_latent_valid += np.sum((preds.squeeze().detach().cpu().numpy(
                ) - data['latent'][time_length - val_pred_length:, :, :].detach().cpu().numpy()) ** 2)

        # Average losses
        test_samples_indices = range(total_n)
        print(total_n)
        valid_loss_loc_avg = r_loss_loc_valid / \
            (total_n * val_pred_length * width * height)
        valid_loss_scale_avg = r_loss_scale_valid / \
            (total_n * val_pred_length * width * height)
        valid_loss_latent_avg = r_loss_latent_valid / \
            (total_n * val_pred_length * width * height)
        logging.info("Validation obs loss for %ds pred VanillaDMM: %f",
                     val_pred_length, valid_loss_loc_avg)
        logging.info("Validation latent loss: %f", valid_loss_latent_avg)

        with open('VanillaDMMResult.log', 'a+') as fout:
            validation_log = 'Pred {}s VanillaDMM: {}\n'.format(
                val_pred_length, valid_loss_loc_avg)
            fout.write(validation_log)
Exemple #22
0
def main(args):
    """
    run inference for CVAE
    :param args: arguments for CVAE
    :return: None
    """
    if args.seed is not None:
        set_seed(args.seed, args.cuda)

    if os.path.exists('cvae.model.pt'):
        print('Loading model %s' % 'cvae.model.pt')
        cvae = torch.load('cvae.model.pt')

    else:

        cvae = CVAE(z_dim=args.z_dim,
                    y_dim=8,
                    x_dim=32612,
                    hidden_dim=args.hidden_dimension,
                    use_cuda=args.cuda)

    print(cvae)

    # setup the optimizer
    adam_params = {
        "lr": args.learning_rate,
        "betas": (args.beta_1, 0.999),
        "clip_norm": 0.5
    }
    optimizer = ClippedAdam(adam_params)
    guide = config_enumerate(cvae.guide, args.enum_discrete)

    # set up the loss for inference.
    loss = SVI(cvae.model,
               guide,
               optimizer,
               loss=TraceEnum_ELBO(max_iarange_nesting=1))

    try:
        # setup the logger if a filename is provided
        logger = open(args.logfile, "w") if args.logfile else None

        data_loaders = setup_data_loaders(NHANES, args.cuda, args.batch_size)
        print(len(data_loaders['train']))
        print(len(data_loaders['test']))
        print(len(data_loaders['valid']))

        # initializing local variables to maintain the best validation acc
        # seen across epochs over the supervised training set
        # and the corresponding testing set and the state of the networks
        best_valid_err, best_test_err = float('inf'), float('inf')

        # run inference for a certain number of epochs
        for i in range(0, args.num_epochs):

            # get the losses for an epoch
            epoch_losses = \
                run_inference_for_epoch(args.batch_size, data_loaders, loss, args.cuda)

            # compute average epoch losses i.e. losses per example
            avg_epoch_losses = epoch_losses / NHANES.train_size

            # store the losses in the logfile
            str_loss = str(avg_epoch_losses)

            str_print = "{} epoch: avg loss {}".format(i,
                                                       "{}".format(str_loss))

            validation_err = get_accuracy(data_loaders["valid"],
                                          cvae.sim_measurements)
            str_print += " validation error {}".format(validation_err)

            # this test accuracy is only for logging, this is not used
            # to make any decisions during training
            test_еrr = get_accuracy(data_loaders["test"],
                                    cvae.sim_measurements)
            str_print += " test error {}".format(test_еrr)

            # update the best validation accuracy and the corresponding
            # testing accuracy and the state of the parent module (including the networks)
            if best_valid_err > validation_err:
                best_valid_err = validation_err
            if best_test_err > test_еrr:
                best_test_err = test_еrr

            print_and_log(logger, str_print)

        final_test_accuracy = get_accuracy(data_loaders["test"],
                                           cvae.sim_measurements)

        print_and_log(
            logger, "best validation error {} corresponding testing error {} "
            "last testing error {}".format(best_valid_err, best_test_err,
                                           final_test_accuracy))
        torch.save(cvae, 'cvae.model.pt')

        #mu, sigma, actuals, lods, masks = get_predictions(data_loaders["prediction"], cvae.sim_measurements)

        #torch.save((mu, sigma, actuals, lods, masks), 'cvae.predictions.pt')

    finally:
        # close the logger file object if we opened it earlier
        if args.logfile:
            logger.close()
Exemple #23
0
def main_test_mnist():
    from torchvision.datasets import MNIST
    from torchvision.transforms import Compose, ToTensor, ToPILImage, Normalize
    transform = Compose([ToTensor()])
    train_dataset = MNIST(root="/tmp",
                          train=True,
                          download=True,
                          transform=transform)
    test_dataset = MNIST(root="/tmp",
                         train=False,
                         download=True,
                         transform=transform)
    vae = VAE(x_dim=784,
              z_dim=50,
              device='cuda' if torch.cuda.is_available() else 'cpu')
    logger.info(f"\n{vae}")
    optimizer = ClippedAdam({"lr": 1e-3})
    svi = SVI(vae.model, vae.guide, optimizer, loss=Trace_ELBO())

    def _update(engine, batch):
        vae.train()
        x, y = batch
        loss = svi.step(x.view(-1, 784).to(vae.device, non_blocking=True))
        return loss / len(x), len(x)

    def _evaluate(engine, batch):
        vae.eval()
        x, y = batch
        elbo = svi.evaluate_loss(
            x.view(-1, 784).to(vae.device, non_blocking=True))
        return elbo / len(x), len(x)

    trainer = Engine(_update)
    evaluater = Engine(_evaluate)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=256,
                                  shuffle=True,
                                  pin_memory=True,
                                  drop_last=True,
                                  num_workers=8)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=256,
                                 shuffle=True,
                                 pin_memory=True,
                                 drop_last=True,
                                 num_workers=8)
    timer = Timer(average=True)
    timer.attach(engine=trainer,
                 start=Events.EPOCH_STARTED,
                 pause=Events.ITERATION_COMPLETED,
                 resume=Events.ITERATION_STARTED,
                 step=Events.ITERATION_COMPLETED)
    loss_metric = RunningAverage(output_transform=lambda outputs: -outputs[0],
                                 alpha=1)
    loss_metric.attach(engine=trainer, name="ELBO")
    loss_metric.attach(engine=evaluater, name="ELBO")
    vis = Visdom(server="gpu1.cluster.peidan.me",
                 port=10697,
                 env='Imp-pyro--vae-MNIST')

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_train_loss(engine):
        elbo = engine.state.metrics['ELBO']
        logger.info(
            f"epoch:{engine.state.epoch}, ELBO: {elbo:.2f}, step time: {timer.value():.3f}s"
        )
        vis.line(Y=[elbo],
                 X=[engine.state.epoch],
                 win="Train-ELBO",
                 update='append',
                 opts={"title": "Train-ELBO"})

    def plot_vae_samples(title):
        x = torch.zeros([1, 784]).to(vae.device)
        for i in range(10):
            images = []
            for rr in range(100):
                # get loc from the model
                sample_loc_i = vae.model(x)
                img = sample_loc_i[0].view(1, 28, 28).cpu().data.numpy()
                images.append(img)
            vis.images(images, 10, 2, win=title, opts={'title': title})

    @trainer.on(Events.EPOCH_COMPLETED)
    def generate_samples(engine):
        epoch = engine.state.epoch
        if epoch % 10 == 0:
            logger.info(f"epoch: {epoch}, plot samples")
            plot_vae_samples(f"epoch-{epoch}")

    @trainer.on(Events.EPOCH_COMPLETED)
    def validation(engine):
        epoch = engine.state.epoch
        if epoch % 5 == 0:
            evaluater.run(test_dataloader)
            elbo = evaluater.state.metrics['ELBO']
            logger.info(f"epoch: {epoch}, validation ELBO: {elbo}")
            vis.line(Y=[elbo],
                     X=[engine.state.epoch],
                     win="Validation-ELBO",
                     update='append',
                     opts={'title': "Validation-ELBO"})

    trainer.run(train_dataloader, max_epochs=2500)
Exemple #24
0
              iaf_dim=50,
              use_cuda=True)

    learning_rate = 0.01
    beta1 = 0.9
    beta2 = 0.999
    clip_norm = 10.0
    lr_decay = 1.0
    weight_decay = 0
    adam_params = {
        "lr": learning_rate,
        "betas": (beta1, beta2),
        "clip_norm": clip_norm,
        "lrd": lr_decay,
        "weight_decay": weight_decay
    }
    adam = ClippedAdam(adam_params)

    elbo = Trace_ELBO()
    svi = SVI(dmm.model, dmm.guide, adam, loss=elbo)
    for i in range(100):
        loss = svi.step(input_tensor, input_tensor_reversed, input_tensor_mask)
        val_nll = svi.evaluate_loss(input_tensor, input_tensor_reversed,
                                    input_tensor_mask)
        print(val_nll)
        _, _, loss_loc, loss_scale = do_prediction(dmm, pred_tensor,
                                                   pred_tensor_reversed,
                                                   pred_tensor_mask, 5,
                                                   ground_truth)
        print(loss_loc, loss_scale)
def main(args):
    # setup logging
    log = get_logger(args.log)
    log(args)

    data = poly.load_data(poly.JSB_CHORALES)
    training_seq_lengths = data['train']['sequence_lengths']
    training_data_sequences = data['train']['sequences']
    test_seq_lengths = data['test']['sequence_lengths']
    test_data_sequences = data['test']['sequences']
    val_seq_lengths = data['valid']['sequence_lengths']
    val_data_sequences = data['valid']['sequences']
    #=== wy: to force batch_size be 20 during training, testing, and validation.
    if debug:
        print("===== after load_data =====")
        print("training_data_sequences:\t shape={}".format(
            training_data_sequences.size()))
        print("test_data_sequences:\t shape={}".format(
            test_data_sequences.size()))
        print("val_data_sequences:\t shape={}".format(
            val_data_sequences.size()))
        print("===== after load_data =====")

    d1_training = int(len(training_seq_lengths) /
                      args.mini_batch_size) * args.mini_batch_size
    d1_test = args.mini_batch_size
    d1_val = args.mini_batch_size
    training_seq_lengths = training_seq_lengths[:d1_training]
    training_data_sequences = training_data_sequences[:d1_training]
    test_seq_lengths = test_seq_lengths[:d1_test]
    test_data_sequences = test_data_sequences[:d1_test]
    val_seq_lengths = val_seq_lengths[:d1_val]
    val_data_sequences = val_data_sequences[:d1_val]
    #=== wy
    N_train_data = len(training_seq_lengths)
    N_train_time_slices = float(torch.sum(training_seq_lengths))
    N_mini_batches = int(N_train_data / args.mini_batch_size +
                         int(N_train_data % args.mini_batch_size > 0))

    log("N_train_data: %d     avg. training seq. length: %.2f    N_mini_batches: %d"
        % (N_train_data, training_seq_lengths.float().mean(), N_mini_batches))

    # how often we do validation/test evaluation during training
    #=== wy
    # val_test_frequency = 50
    # WL: edited. =====
    # val_test_frequency = 1
    val_test_frequency = 0
    # =================
    #=== wy
    # the number of samples we use to do the evaluation
    n_eval_samples = 1

    # package repeated copies of val/test data for faster evaluation
    # (i.e. set us up for vectorization)
    def rep(x):
        rep_shape = torch.Size([x.size(0) * n_eval_samples]) + x.size()[1:]
        repeat_dims = [1] * len(x.size())
        repeat_dims[0] = n_eval_samples
        return x.repeat(repeat_dims).reshape(n_eval_samples, -1).transpose(
            1, 0).reshape(rep_shape)

    # get the validation/test data ready for the dmm: pack into sequences, etc.
    val_seq_lengths = rep(val_seq_lengths)
    test_seq_lengths = rep(test_seq_lengths)
    val_batch, val_batch_reversed, val_batch_mask, val_seq_lengths = poly.get_mini_batch(
        torch.arange(n_eval_samples * val_data_sequences.shape[0]),
        rep(val_data_sequences),
        val_seq_lengths,
        cuda=args.cuda)
    test_batch, test_batch_reversed, test_batch_mask, test_seq_lengths = poly.get_mini_batch(
        torch.arange(n_eval_samples * test_data_sequences.shape[0]),
        rep(test_data_sequences),
        test_seq_lengths,
        cuda=args.cuda)

    # instantiate the dmm
    dmm = DMM(rnn_dropout_rate=args.rnn_dropout_rate,
              num_iafs=args.num_iafs,
              iaf_dim=args.iaf_dim,
              use_cuda=args.cuda)

    # setup optimizer
    adam_params = {
        "lr": args.learning_rate,
        "betas": (args.beta1, args.beta2),
        "clip_norm": args.clip_norm,
        "lrd": args.lr_decay,
        "weight_decay": args.weight_decay
    }
    adam = ClippedAdam(adam_params)

    # setup inference algorithm
    elbo = JitTrace_ELBO() if args.jit else Trace_ELBO()
    svi = SVI(model, guide, adam, loss=elbo)

    # now we're going to define some functions we need to form the main training loop

    # saves the model and optimizer states to disk
    def save_checkpoint():
        log("saving model to %s..." % args.save_model)
        torch.save(dmm.state_dict(), args.save_model)
        log("saving optimizer states to %s..." % args.save_opt)
        adam.save(args.save_opt)
        log("done saving model and optimizer checkpoints to disk.")

    # loads the model and optimizer states from disk
    def load_checkpoint():
        assert exists(args.load_opt) and exists(args.load_model), \
            "--load-model and/or --load-opt misspecified"
        log("loading model from %s..." % args.load_model)
        dmm.load_state_dict(torch.load(args.load_model))
        log("loading optimizer states from %s..." % args.load_opt)
        adam.load(args.load_opt)
        log("done loading model and optimizer states.")

    # prepare a mini-batch and take a gradient step to minimize -elbo
    def process_minibatch(epoch, which_mini_batch, shuffled_indices):
        if args.annealing_epochs > 0 and epoch < args.annealing_epochs:
            # compute the KL annealing factor approriate for the current mini-batch in the current epoch
            min_af = args.minimum_annealing_factor
            annealing_factor = min_af + (1.0 - min_af) * \
                (float(which_mini_batch + epoch * N_mini_batches + 1) /
                 float(args.annealing_epochs * N_mini_batches))
        else:
            # by default the KL annealing factor is unity
            annealing_factor = 1.0

        # compute which sequences in the training set we should grab
        mini_batch_start = (which_mini_batch * args.mini_batch_size)
        mini_batch_end = np.min([(which_mini_batch + 1) * args.mini_batch_size,
                                 N_train_data])
        mini_batch_indices = shuffled_indices[mini_batch_start:mini_batch_end]
        # grab a fully prepped mini-batch using the helper function in the data loader
        if debug: print("===== process_minibatch:S =====")
        mini_batch, mini_batch_reversed, mini_batch_mask, mini_batch_seq_lengths \
            = poly.get_mini_batch(mini_batch_indices, training_data_sequences,
                                  training_seq_lengths, cuda=args.cuda)
        if debug: print("===== process_minibatch:E =====")
        # do an actual gradient step
        loss = svi.step(mini_batch, mini_batch_reversed, mini_batch_mask,
                        mini_batch_seq_lengths, annealing_factor)
        # keep track of the training loss
        return loss

    # helper function for doing evaluation
    def do_evaluation():
        # put the RNN into evaluation mode (i.e. turn off drop-out if applicable)
        rnn.eval()

        # compute the validation and test loss n_samples many times
        val_nll = svi.evaluate_loss(
            val_batch, val_batch_reversed, val_batch_mask,
            val_seq_lengths) / torch.sum(val_seq_lengths)
        test_nll = svi.evaluate_loss(
            test_batch, test_batch_reversed, test_batch_mask,
            test_seq_lengths) / torch.sum(test_seq_lengths)

        # put the RNN back into training mode (i.e. turn on drop-out if applicable)
        rnn.train()
        return val_nll, test_nll

    # if checkpoint files provided, load model and optimizer states from disk before we start training
    if args.load_opt != '' and args.load_model != '':
        load_checkpoint()

    #################
    # TRAINING LOOP #
    #################
    times = [time.time()]
    # WL: added. =====
    log("\nepoch\t" + "elbo\t" + "time(sec)")
    # ================
    for epoch in range(args.num_epochs):
        # if specified, save model and optimizer states to disk every checkpoint_freq epochs
        if args.checkpoint_freq > 0 and epoch > 0 and epoch % args.checkpoint_freq == 0:
            save_checkpoint()

        # accumulator for our estimate of the negative log likelihood (or rather -elbo) for this epoch
        epoch_nll = 0.0
        # prepare mini-batch subsampling indices for this epoch
        shuffled_indices = torch.randperm(N_train_data)

        # process each mini-batch; this is where we take gradient steps
        for which_mini_batch in range(N_mini_batches):
            epoch_nll += process_minibatch(epoch, which_mini_batch,
                                           shuffled_indices)

        # report training diagnostics
        times.append(time.time())
        epoch_time = times[-1] - times[-2]
        # WL: edited. =====
        # log("[training epoch %04d]  %.4f \t\t\t\t(dt = %.3f sec)" %
        #     (epoch, epoch_nll / N_train_time_slices, epoch_time))
        log(f"{epoch:06d}\t"
            f"{-epoch_nll / N_train_time_slices:.4f}\t"
            f"{epoch_time:.3f}")
        # =================

        # do evaluation on test and validation data and report results
        if val_test_frequency > 0 and epoch > 0 and epoch % val_test_frequency == 0:
            val_nll, test_nll = do_evaluation()
            log("[val/test epoch %04d]  %.4f  %.4f" %
                (epoch, val_nll, test_nll))
    def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]:
        if self._training_inputs is None:
            raise Exception('Cannot fit when no training data is present.')

        if self._fitted:
            return base.CallResult(None)

        # Extract curated data into X and Y's
        X_train = self._training_inputs[['unique_id', 'ds']]
        X_train['x'] = '1'
        y_train = self._training_inputs[['unique_id', 'ds', 'y']]
        y_train['y'] += self._constant # To remove missing values

        if timeout is None:
            timeout = np.inf
        if iterations is None:
            _iterations = self._num_iterations
        else:
            _iterations = iterations

        # Dataloader
        training_set = Dataset(config=self.hyperparams, X=X_train, y=y_train, min_series_length=self.hyperparams['seq_length'])
        # If the length is less than hyperparams, defaults to the minimum in dataset
        self._seq_length = min(self.hyperparams['seq_length'], training_set.min_series_length)

        # Dataset Parameters
        train_params = {'batch_size': self._batch_size,
                        'shuffle': True}

        # Data Generators
        training_generator = data.DataLoader(training_set, **train_params)

        # Setup Model
        self._obs_dim   = training_set.n_series
        self._net       = self._create_dmm()
        adam            = ClippedAdam(self._adam_params)
        self._optimizer = SVI(self._net.model, self._net.guide, adam, "ELBO")

        # Train functions
        self._iterations_done = 0
        self._has_finished    = False

        # Set model to training
        self._net.train()

        for iters in _iterations:
            epoch_nll = 0.0
            iteration_count = 0
            for local_batch, local_labels in training_generator:
                _local_training_batch = torch.cat((local_batch, local_labels), axis=2)
                mini_batch, mini_batch_reversed = self._reverse_sequences(mini_batch=_local_training_batch)
                # Loss for minibatch and minibatch reversed
                loss = self._optimizer.step(mini_batch, mini_batch_reversed)
                iteration_count += 1
                epoch_nll += loss
                # Break
                if np.isnan(loss):
                    print("minibatch nan-ed out!")
                    break
                if np.isinf(loss):
                    print("minibatch inf-ed out!")
                    break

            print("[training epoch %04d]  %.4f " % (epoch, epoch_nll/iteration_count))

        self._fitted = True

        return CallResult(None)
def main(args):
    pyro.enable_validation(is_validate=True)
    dataset = BitextDataSet(args.L1, args.L2,sentence_limit=5)
    dataloader = DataLoader(dataset, batch_size=64, collate_fn=custom_collator, shuffle=True)
    #pyro fun, all of it is still broken :P 
    #vnmt = VNMT(dataset.getSrcWord2Index(), dataset.getTgtWord2Index(),use_cuda=True)
    #pyroseq2seq = Seq2Seq(dataset.getSrcWord2Index(), dataset.getTgtWord2Index(), use_cuda=True)
    #rnnsearch = RNNSearch(dataset.getSrcWord2Index(), dataset.getTgtWord2Index(), use_cuda=True)

    #classical approach
    #seq2seq = Seq2Seq(dataset.getSrcWord2Index(), dataset.getTgtWord2Index(), use_cuda=True)
    model = make_model(dataset.getSrcWord2Index(), dataset.getTgtWord2Index())

    #print(vnmt)
    #vnmt = vnmt.cuda()
    # setup optimizer
    adam_params = {"lr": 0.003, #"betas": (args.beta1, args.beta2),
                   "clip_norm": 20.0, "lrd": .99996,
                   "weight_decay": 0.0}
    adam = ClippedAdam(adam_params)
    # setup inference algorithm
    elbo = JitTrace_ELBO() if False else Trace_ELBO()
    pad_indx = model.trg_embed.getWord2Index('<PAD>')
    optim = torch.optim.Adam(model.parameters(), lr=1e-3)
    loss_compute = SimpleLossCompute(model.generator, nn.NLLLoss(reduction="sum",ignore_index=pad_indx), opt=optim) 
    #svi = SVI(vnmt.model, vnmt.guide, adam, loss=elbo)
    #svi = SVI(seq2seq.model, seq2seq.guide, adam, loss=elbo)
    #svi = SVI(rnnsearch.model, rnnsearch.guide, adam, loss=elbo)

    #Classical pytorch approach 

    epochs = 2 
    print(len(dataloader))
    print(len(dataset))
    #seq2seq.train()
    for e in range(0, epochs):
        tot_loss = 0.0
        run_epoch(dataloader, model, loss_compute)
        #for i, b in enumerate(dataloader):
            #print(b)
            #l = svi.step(b[0], b[1])
            #optim.zero_grad()
            #loss = seq2seq( b[0], b[1]) 
            #loss.backward()
            #optim.step()
            #tot_loss += loss.item() * len(b[0])
            
            #print('batch {}, loss {}'.format(i, l))
            #loss += l
        print(tot_loss / len(dataset))
    
    to_translate = [dataset[i][0] for i in range(2) ]
    print('Original Sentences')
    for sent in to_translate:
        print(sent)
    print('Greedy Decoding Translation')
    greedy_trans = GreedyDecodingTranslation(model, to_translate)
    for sent in greedy_trans:
        print(' '.join([model.y_embed.getIndex2Word(s) for s in sent]))
    print('Simple Greedy Decoding Translation')
    greedy_trans = SimpleGreedyDecodingTranslation(model, to_translate)
    for sent in greedy_trans:
        print(' '.join([model.y_embed.getIndex2Word(s) for s in sent]))
    #print('Presumably Beam Search Decoding Translation')
    #beam_trans = BeamDecodingTranslation(seq2seq, to_translate, 3)
    #for sent in beam_trans:
    #    print(' '.join([seq2seq.y_embed.getIndex2Word(s) for s in sent]))




    model = None
    optimizer = None
    loss = None
Exemple #28
0
def main(args):
    # setup logging
    log = get_logger(args.log)
    log(args)

    root_dir = r'D:\projects\trading\mlbootcamp\tickers'
    series_length = 60
    lookback = 50  #160
    input_dim = 1
    train_start_date = datetime.date(2010, 1, 1)
    train_end_date = datetime.date(2016, 1, 1)
    test_start_date = train_end_date
    test_end_date = datetime.date(2019, 1, 1)
    min_sequence_length_train = 2 * (series_length + lookback)
    min_sequence_length_test = 2 * (series_length + lookback)

    dataset_train = create_ticker_dataset(root_dir,
                                          series_length,
                                          lookback,
                                          min_sequence_length_train,
                                          start_date=train_start_date,
                                          end_date=train_end_date)
    dataset_test = create_ticker_dataset(root_dir,
                                         series_length,
                                         lookback,
                                         min_sequence_length_test,
                                         start_date=test_start_date,
                                         end_date=test_end_date)
    dataloader_train = DataLoader(dataset_train,
                                  batch_size=args.mini_batch_size,
                                  shuffle=True,
                                  num_workers=0,
                                  drop_last=True)
    dataloader_test = DataLoader(dataset_test,
                                 batch_size=args.mini_batch_size,
                                 shuffle=False,
                                 num_workers=0,
                                 drop_last=True)

    N_train_data = len(dataset_train)
    N_test_data = len(dataset_test)
    N_mini_batches = N_train_data // args.mini_batch_size
    N_train_time_slices = args.mini_batch_size * N_mini_batches

    print(f'N_train_data: {N_train_data}, N_test_data: {N_test_data}')

    # how often we do validation/test evaluation during training
    val_test_frequency = 50
    # the number of samples we use to do the evaluation
    n_eval_samples = 1

    # instantiate the dmm
    dmm = DMM(input_dim=input_dim,
              rnn_dropout_rate=args.rnn_dropout_rate,
              num_iafs=args.num_iafs,
              iaf_dim=args.iaf_dim,
              use_cuda=args.cuda)

    # setup optimizer
    adam_params = {
        "lr": args.learning_rate,
        "betas": (args.beta1, args.beta2),
        "clip_norm": args.clip_norm,
        "lrd": args.lr_decay,
        "weight_decay": args.weight_decay
    }
    adam = ClippedAdam(adam_params)

    # setup inference algorithm
    elbo = JitTrace_ELBO() if args.jit else Trace_ELBO()
    svi = SVI(dmm.model, dmm.guide, adam, loss=elbo)

    # if checkpoint files provided, load model and optimizer states from disk before we start training
    if args.load_opt != '' and args.load_model != '':
        load_checkpoint(dmm, adam, log)

    #################
    # TRAINING LOOP #
    #################
    times = [time.time()]
    for epoch in range(args.num_epochs):
        print(f'Starting epoch {epoch}.')
        # accumulator for our estimate of the negative log likelihood (or rather -elbo) for this epoch
        epoch_nll = 0.0
        # prepare mini-batch subsampling indices for this epoch
        shuffled_indices = torch.randperm(N_train_data)

        # process each mini-batch; this is where we take gradient steps
        dmm.train()
        for which_mini_batch in range(N_mini_batches):
            print(
                f'Epoch {epoch} of {args.num_epochs}, Batch {which_mini_batch} of {N_mini_batches}.'
            )
            mini_batch = next(iter(dataloader_train))
            epoch_nll += process_minibatch(svi, epoch, mini_batch,
                                           N_mini_batches, which_mini_batch,
                                           shuffled_indices)

        # if specified, save model and optimizer states to disk every checkpoint_freq epochs
        if 1:  #args.checkpoint_freq > 0 and epoch > 0 and epoch % args.checkpoint_freq == 0:
            save_checkpoint(dmm, adam, log)

        # report training diagnostics
        times.append(time.time())
        epoch_time = times[-1] - times[-2]
        log("[training epoch %04d]  %.4f \t\t\t\t(dt = %.3f sec)" %
            (epoch, epoch_nll / N_train_time_slices, epoch_time))

        # do evaluation on test and validation data and report results
        if val_test_frequency > 0 and epoch > 0 and epoch % val_test_frequency == 0:
            val_nll, test_nll = do_evaluation()
            log("[val/test epoch %04d]  %.4f  %.4f" %
                (epoch, val_nll, test_nll))

        # Testing.
        print(f"Testing epoch {epoch}.")
        dmm.eval()
        mini_batch = next(iter(dataloader_test))
        fig = test_minibatch(dmm, mini_batch, args)
        fig.savefig(f'test_batch_{epoch}.png')
        plt.close(fig)

        # if 1:
        #     fig, _, _ = run_tsne(dmm, dataloader_test)
        #     fig.savefig(f'tsne_{epoch}.png')
        #     plt.close(fig)

    print("Testing")
    if 1:
        dmm.eval()
        mini_batch = next(iter(dataloader_test))

        x, z, x_reconst = test_minibatch(dmm, mini_batch, args)

        n_plots = 5
        fig, axes = plt.subplots(nrows=n_plots, ncols=1)
        for i in range(n_plots):
            input = x[i, :].numpy().squeeze()
            output = x_reconst[i, :]
            axes[i].plot(range(input.shape[0]), input)
            axes[i].plot(range(len(output)), output)
            axes[i].grid()
        fig.savefig(f'test_batch.png')
        plt.close(fig)

    if 1:
        # t-SNE.
        all_z_latents = []
        for test_batch in dataloader_test:
            # z_latents = minibatch_inference(dmm, test_batch)
            # z_latents = encode_x_to_z(dmm, test_batch, sample_z_t=False)
            x, z, x_reconst = test_minibatch(dmm,
                                             test_batch,
                                             args,
                                             sample_z=True)

            all_z_latents.append(z[:, x.shape[1] - 1, :])
        # all_latents = torch.cat(all_z_latents, dim=0)
        all_latents = np.concatenate(all_z_latents, axis=0)

        # Run t-SNE with 2 output dimensions.
        from sklearn.manifold import TSNE
        model_tsne = TSNE(n_components=2, random_state=0)
        # z_states = all_latents.detach().cpu().numpy()
        z_states = all_latents
        z_embed = model_tsne.fit_transform(z_states)
        # Plot t-SNE embedding.
        fig = plt.figure()
        plt.scatter(z_embed[:, 0], z_embed[:, 1], s=10)

        fig.savefig(f'tsne_test.png')
        plt.close(fig)

    return

    # Show some samples surrounding a given point.
    mini_batch = next(iter(dataloader_test))
    # Use the inference network to determine the parameters of the latents.
    z_loc, z_scale = minibatch_latent_parameters(dmm, mini_batch)
    z = sample_latent_sequence(dmm, z_loc, z_scale)

    most_recent_latents = latents[:, -1, :]
    # Take
    n_random_samples = 9

    print('Finished')
Exemple #29
0
def main(args):
    # setup logging
    log = get_logger(args.log)
    log(args)

    jsb_file_loc = "./data/jsb_processed.pkl"
    # ingest training/validation/test data from disk
    data = pickle.load(open(jsb_file_loc, "rb"))
    training_seq_lengths = data['train']['sequence_lengths']
    training_data_sequences = data['train']['sequences']
    test_seq_lengths = data['test']['sequence_lengths']
    test_data_sequences = data['test']['sequences']
    val_seq_lengths = data['valid']['sequence_lengths']
    val_data_sequences = data['valid']['sequences']
    N_train_data = len(training_seq_lengths)
    N_train_time_slices = float(np.sum(training_seq_lengths))
    N_mini_batches = int(N_train_data / args.mini_batch_size +
                         int(N_train_data % args.mini_batch_size > 0))

    log("N_train_data: %d     avg. training seq. length: %.2f    N_mini_batches: %d"
        % (N_train_data, np.mean(training_seq_lengths), N_mini_batches))

    # how often we do validation/test evaluation during training
    val_test_frequency = 50
    # the number of samples we use to do the evaluation
    n_eval_samples = 1

    # package repeated copies of val/test data for faster evaluation
    # (i.e. set us up for vectorization)
    def rep(x):
        y = np.repeat(x, n_eval_samples, axis=0)
        return y

    # get the validation/test data ready for the dmm: pack into sequences, etc.
    val_seq_lengths = rep(val_seq_lengths)
    test_seq_lengths = rep(test_seq_lengths)
    val_batch, val_batch_reversed, val_batch_mask, val_seq_lengths = poly.get_mini_batch(
        np.arange(n_eval_samples * val_data_sequences.shape[0]),
        rep(val_data_sequences),
        val_seq_lengths,
        cuda=args.cuda)
    test_batch, test_batch_reversed, test_batch_mask, test_seq_lengths = poly.get_mini_batch(
        np.arange(n_eval_samples * test_data_sequences.shape[0]),
        rep(test_data_sequences),
        test_seq_lengths,
        cuda=args.cuda)

    # instantiate the dmm
    dmm = DMM(rnn_dropout_rate=args.rnn_dropout_rate,
              num_iafs=args.num_iafs,
              iaf_dim=args.iaf_dim,
              use_cuda=args.cuda)

    # setup optimizer
    adam_params = {
        "lr": args.learning_rate,
        "betas": (args.beta1, args.beta2),
        "clip_norm": args.clip_norm,
        "lrd": args.lr_decay,
        "weight_decay": args.weight_decay
    }
    adam = ClippedAdam(adam_params)

    # setup inference algorithm
    elbo = SVI(dmm.model, dmm.guide, adam, Trace_ELBO())

    # now we're going to define some functions we need to form the main training loop

    # saves the model and optimizer states to disk
    def save_checkpoint():
        log("saving model to %s..." % args.save_model)
        torch.save(dmm.state_dict(),
                   os.path.join('.', 'checkpoints', 'dmm_model.pth'))
        log("saving optimizer states to %s..." % args.save_opt)
        adam.save(os.path.join('.', 'checkpoints', 'dmm_opt.pth'))
        log("done saving model and optimizer checkpoints to disk.")

    # loads the model and optimizer states from disk
    def load_checkpoint():
        assert exists(args.load_opt) and exists(args.load_model), \
            "--load-model and/or --load-opt misspecified"
        log("loading model from %s..." % args.load_model)
        dmm.load_state_dict(
            torch.load(os.path.join('.', 'checkpoints', 'dmm_model.pth')))
        log("loading optimizer states from %s..." % args.load_opt)
        adam.load(os.path.join('.', 'checkpoints', 'dmm_opt.pth'))
        log("done loading model and optimizer states.")

    # prepare a mini-batch and take a gradient step to minimize -elbo
    def process_minibatch(epoch, which_mini_batch, shuffled_indices):
        if args.annealing_epochs > 0 and epoch < args.annealing_epochs:
            # compute the KL annealing factor approriate for the current mini-batch in the current epoch
            min_af = args.minimum_annealing_factor
            annealing_factor = min_af + (1.0 - min_af) * \
                (float(which_mini_batch + epoch * N_mini_batches + 1) /
                 float(args.annealing_epochs * N_mini_batches))
        else:
            # by default the KL annealing factor is unity
            annealing_factor = 1.0

        # compute which sequences in the training set we should grab
        mini_batch_start = (which_mini_batch * args.mini_batch_size)
        mini_batch_end = np.min([(which_mini_batch + 1) * args.mini_batch_size,
                                 N_train_data])
        mini_batch_indices = shuffled_indices[mini_batch_start:mini_batch_end]

        # grab a fully prepped mini-batch using the helper function in the data loader
        mini_batch, mini_batch_reversed, mini_batch_mask, mini_batch_seq_lengths \
            = poly.get_mini_batch(mini_batch_indices, training_data_sequences,
                                  training_seq_lengths, cuda=args.cuda)
        # do an actual gradient step
        loss = elbo.step(mini_batch, mini_batch_reversed, mini_batch_mask,
                         mini_batch_seq_lengths, annealing_factor)
        # keep track of the training loss
        return loss

    # helper function for doing evaluation
    def do_evaluation():
        # put the RNN into evaluation mode (i.e. turn off drop-out if applicable)
        dmm.rnn.eval()

        # compute the validation and test loss n_samples many times
        val_nll = elbo.evaluate_loss(val_batch, val_batch_reversed,
                                     val_batch_mask,
                                     val_seq_lengths) / np.sum(val_seq_lengths)
        test_nll = elbo.evaluate_loss(
            test_batch, test_batch_reversed, test_batch_mask,
            test_seq_lengths) / np.sum(test_seq_lengths)

        # put the RNN back into training mode (i.e. turn on drop-out if applicable)
        dmm.rnn.train()
        return val_nll, test_nll

    # if checkpoint files provided, load model and optimizer states from disk before we start training
    if args.load_opt != '' and args.load_model != '':
        load_checkpoint()

    #################
    # TRAINING LOOP #
    #################
    desc = "Epoch %4d | Loss %.4f | CUDA enabled" if args.cuda else "Epoch %4d | Loss %.4f"
    pbar = trange(args.num_epochs, desc=desc, unit="epoch")
    # for epoch in range(args.num_epochs):
    for epoch in pbar:
        # if specified, save model and optimizer states to disk every checkpoint_freq epochs
        if args.checkpoint_freq > 0 and epoch > 0 and epoch % args.checkpoint_freq == 0:
            save_checkpoint()

        # accumulator for our estimate of the negative log likelihood (or rather -elbo) for this epoch
        epoch_nll = 0.0
        # prepare mini-batch subsampling indices for this epoch
        shuffled_indices = np.arange(N_train_data)
        np.random.shuffle(shuffled_indices)

        # process each mini-batch; this is where we take gradient steps
        for which_mini_batch in range(N_mini_batches):
            epoch_nll += process_minibatch(epoch, which_mini_batch,
                                           shuffled_indices)

        # report training diagnostics
        pbar.set_description(desc % (epoch, epoch_nll / N_train_time_slices))

        if np.isnan(epoch_nll):
            print("Gradient exploded. Exiting program.")
            quit()

        # do evaluation on test and validation data and report results
        if val_test_frequency > 0 and epoch > 0 and epoch % val_test_frequency == 0:
            val_nll, test_nll = do_evaluation()
            pbar.write("Epoch %04d | Validation Loss %.4f, Test Loss %.4f" %
                       (epoch, val_nll, test_nll))
Exemple #30
0
    # load weights if available
    prev_epoch = 0
    if args.weights is not None:
        try:
            print("Loading trained weights: {}".format(args.weights))
            states = torch.load(args.weights)
            vae.load_state_dict(states['state_dict'])
            prev_epoch = int(states['epoch'])
        except IOError:
            print("File not found: {}".format(args.weights))
            sys.exit(-1)

    # optimizer
    adam_params = {'lr': 1e-6, 'clip_norm': 10, 'weight_decay': opts['w_decay']}
    optimizer = ClippedAdam(adam_params)

    # inference
    svi = SVI(vae.model, vae.guide, optimizer, loss='ELBO')

    # loss
    train_elbo = [None] * args.epochs
    test_elbo = [None] * args.epochs

    # training info
    print("... Training VAE with {} days".format(args.dim))
    with open('./models/vae_log.txt', 'a') as f:
        f.write('=== New run ===\n')

    # training loop
    best_lb = -np.inf