def main(args): logging.info('Generating data') pyro.set_rng_seed(0) pyro.clear_param_store() pyro.enable_validation(__debug__) # We can generate synthetic data directly by calling the model. true_topic_weights, true_topic_words, data = model(args=args) # We'll train using SVI. logging.info('-' * 40) logging.info('Training on {} documents'.format(args.num_docs)) predictor = make_predictor(args) guide = functools.partial(parametrized_guide, predictor) Elbo = JitTraceEnum_ELBO if args.jit else TraceEnum_ELBO elbo = Elbo(max_plate_nesting=2) optim = ClippedAdam({'lr': args.learning_rate}) svi = SVI(model, guide, optim, elbo) logging.info('Step\tLoss') for step in range(args.num_steps): loss = svi.step(data, args=args, batch_size=args.batch_size) if step % 10 == 0: logging.info('{: >5d}\t{}'.format(step, loss)) loss = elbo.loss(model, guide, data, args=args) logging.info('final loss = {}'.format(loss))
def fit(self, model_name, model_param_names, data_input, init_values=None): # verbose is passed through from orbit.models.base_estimator verbose = self.verbose message = self.message learning_rate = self.learning_rate learning_rate_total_decay = self.learning_rate_total_decay num_sample = self.num_sample seed = self.seed num_steps = self.num_steps pyro.set_rng_seed(seed) Model = get_pyro_model(model_name) # abstract model = Model(data_input) # concrete # Perform stochastic variational inference using an auto guide. pyro.clear_param_store() guide = AutoLowRankMultivariateNormal(model) optim = ClippedAdam({ "lr": learning_rate, "lrd": learning_rate_total_decay**(1 / num_steps) }) elbo = Trace_ELBO(num_particles=self.num_particles, vectorize_particles=True) svi = SVI(model, guide, optim, elbo) for step in range(num_steps): loss = svi.step() if verbose and step % message == 0: scale_rms = guide._loc_scale()[1].detach().pow( 2).mean().sqrt().item() print("step {: >4d} loss = {:0.5g}, scale = {:0.5g}".format( step, loss, scale_rms)) # Extract samples. vectorize = pyro.plate("samples", num_sample, dim=-1 - model.max_plate_nesting) with pyro.poutine.trace() as tr: samples = vectorize(guide)() with pyro.poutine.replay(trace=tr.trace): samples.update(vectorize(model)()) # Convert from torch.Tensors to numpy.ndarrays. extract = { name: value.detach().squeeze().numpy() for name, value in samples.items() } # make sure that model param names are a subset of stan extract keys invalid_model_param = set(model_param_names) - set(list( extract.keys())) if invalid_model_param: raise EstimatorException( "Stan model definition does not contain required parameters") # `stan.optimizing` automatically returns all defined parameters # filter out unecessary keys extract = {param: extract[param] for param in model_param_names} return extract
def fit( self, x, t, y, num_epochs=100, batch_size=100, learning_rate=1e-3, learning_rate_decay=0.1, weight_decay=1e-4, log_every=100, ): """ Train using :class:`~pyro.infer.svi.SVI` with the :class:`TraceCausalEffect_ELBO` loss. :param ~torch.Tensor x: :param ~torch.Tensor t: :param ~torch.Tensor y: :param int num_epochs: Number of training epochs. Defaults to 100. :param int batch_size: Batch size. Defaults to 100. :param float learning_rate: Learning rate. Defaults to 1e-3. :param float learning_rate_decay: Learning rate decay over all epochs; the per-step decay rate will depend on batch size and number of epochs such that the initial learning rate will be ``learning_rate`` and the final learning rate will be ``learning_rate * learning_rate_decay``. Defaults to 0.1. :param float weight_decay: Weight decay. Defaults to 1e-4. :param int log_every: Log loss each this-many steps. If zero, do not log loss. Defaults to 100. :return: list of epoch losses """ assert x.dim() == 2 and x.size(-1) == self.feature_dim assert t.shape == x.shape[:1] assert y.shape == y.shape[:1] self.whiten = PreWhitener(x) dataset = TensorDataset(x, t, y) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True) logger.info("Training with {} minibatches per epoch".format( len(dataloader))) num_steps = num_epochs * len(dataloader) optim = ClippedAdam({ "lr": learning_rate, "weight_decay": weight_decay, "lrd": learning_rate_decay**(1 / num_steps), }) svi = SVI(self.model, self.guide, optim, TraceCausalEffect_ELBO()) losses = [] for epoch in range(num_epochs): for x, t, y in dataloader: x = self.whiten(x) loss = svi.step(x, t, y, size=len(dataset)) / len(dataset) if log_every and len(losses) % log_every == 0: logger.debug("step {: >5d} loss = {:0.6g}".format( len(losses), loss)) assert not torch_isnan(loss) losses.append(loss) return losses
def fit(self, model_name, model_param_names, data_input, fitter=None, init_values=None): verbose = self.verbose message = self.message learning_rate = self.learning_rate seed = self.seed num_steps = self.num_steps learning_rate_total_decay = self.learning_rate_total_decay pyro.set_rng_seed(seed) if fitter is None: fitter = get_pyro_model(model_name) # abstract model = fitter(data_input) # concrete # Perform MAP inference using an AutoDelta guide. pyro.clear_param_store() guide = AutoDelta(model) optim = ClippedAdam({ "lr": learning_rate, "lrd": learning_rate_total_decay**(1 / num_steps), "betas": (0.5, 0.8) }) elbo = Trace_ELBO() loss_elbo = list() svi = SVI(model, guide, optim, elbo) for step in range(num_steps): loss = svi.step() loss_elbo.append(loss) if verbose and step % message == 0: print("step {: >4d} loss = {:0.5g}".format(step, loss)) # Extract point estimates. values = guide() values.update(pyro.poutine.condition(model, values)()) # Convert from torch.Tensors to numpy.ndarrays. extract = { name: value.detach().numpy() for name, value in values.items() } # make sure that model param names are a subset of stan extract keys invalid_model_param = set(model_param_names) - set(list( extract.keys())) if invalid_model_param: raise EstimatorException( "Pyro model definition does not contain required parameters") # `stan.optimizing` automatically returns all defined parameters # filter out unnecessary keys posteriors = {param: extract[param] for param in model_param_names} training_metrics = {'loss_elbo': np.array(loss_elbo)} return posteriors, training_metrics
def main(args): """ run inference for CVAE :param args: arguments for CVAE :return: None """ if args.seed is not None: set_seed(args.seed, args.cuda) if os.path.exists('cvae.model.pt'): print('Loading model %s' % 'cvae.model.pt') cvae = torch.load('cvae.model.pt') else: cvae = CVAE(z_dim=args.z_dim, y_dim=8, x_dim=32612, hidden_dim=args.hidden_dimension, use_cuda=args.cuda) print(cvae) # setup the optimizer adam_params = { "lr": args.learning_rate, "betas": (args.beta_1, 0.999), "clip_norm": 0.5 } optimizer = ClippedAdam(adam_params) guide = config_enumerate(cvae.guide, args.enum_discrete) # set up the loss for inference. loss = SVI(cvae.model, guide, optimizer, loss=TraceEnum_ELBO(max_iarange_nesting=1)) try: # setup the logger if a filename is provided logger = open(args.logfile, "w") if args.logfile else None data_loaders = setup_data_loaders(NHANES, args.cuda, args.batch_size) print(len(data_loaders['prediction'])) #torch.save(cvae, 'cvae.model.pt') mu, sigma, actuals, lods, masks = get_predictions( data_loaders["prediction"], cvae.sim_measurements) torch.save((mu, sigma, actuals, lods, masks), 'cvae.predictions.pt') finally: # close the logger file object if we opened it earlier if args.logfile: logger.close()
def make_svi(model, guide, args=None, kwargs=None, steps=1000, lr=0.05, cut_time=slice(None, None), max_steps=2000, ensure_convergence=False, loss='ELBO'): adam_params = { "lr": lr, "betas": (0.90, 0.999), 'weight_decay': 0.005, 'clip_norm': 10 } optimizer = ClippedAdam(adam_params) # svi = SVI(model, guide, optimizer, loss=trace_mle(cut_time)) if loss == 'ELBO': svi = SVI(model, guide, optimizer, loss=JitTraceEnum_ELBO()) if loss == 'MLE': svi = SVI(model, guide, optimizer, loss=trace_mle()) pbar = tqdm(range(1, steps + 1)) time_start = 0 loss_arr = [] for i in pbar: loss, time_start = make_step(svi, pbar, time_start, args, kwargs) loss_arr.append(loss) while ensure_convergence: std_prev = np.std(loss_arr[-20:-1]) mean_cur = np.mean(loss_arr[-100:]) mean_prev = np.mean(loss_arr[-200:-100]) prob = stat.norm(mean_prev, std_prev).cdf(mean_cur) # print(prob, mean_cur, mean_prev, std_prev) if mean_cur < mean_prev and prob < 0.05 and len(loss_arr) < max_steps: pbar = tqdm(range(1, 100 + 1), leave=False) for j in pbar: loss, time_start = make_step(svi, pbar, time_start, args, kwargs, prefix='Extra: ') loss_arr.append(loss) else: break return loss
def train(args): model = Model(args.dim, 2 * args.rank) guide = AutoLowRankMultivariateNormal(model, rank=args.rank, init_scale=0.01) optim = ClippedAdam({"lr": args.learning_rate}) elbo = Trace_ELBO() svi = SVI(model, guide, optim, elbo) losses = [] for step in range(args.num_steps): loss = svi.step() / args.dim losses.append(loss) if step % 100 == 0: print("step {: >4} loss = {:0.8g}".format(step, loss))
def test_broken_plates_smoke(backend): def model(): with pyro.plate("i", 2): a = pyro.sample("a", dist.Normal(0, 1)) pyro.sample("b", dist.Normal(a.mean(-1), 1), obs=torch.tensor(0.0)) guide = AutoGaussian(model, backend=backend) svi = SVI(model, guide, ClippedAdam({"lr": 1e-8}), Trace_ELBO()) for step in range(2): with xfail_if_not_implemented(): svi.step() guide() predictive = Predictive(model, guide=guide, num_samples=2) predictive()
def test_pyrocov_smoke(model, Guide, backend): T, P, S, F = 3, 4, 5, 6 dataset = { "features": torch.randn(S, F), "local_time": torch.randn(T, P), "weekly_strains": torch.randn(T, P, S).exp().round(), } guide = Guide(model, backend=backend) svi = SVI(model, guide, ClippedAdam({"lr": 1e-8}), Trace_ELBO()) for step in range(2): with xfail_if_not_implemented(): svi.step(dataset) guide(dataset) predictive = Predictive(model, guide=guide, num_samples=2) predictive(dataset)
def main(args): pyro.set_rng_seed(0) pyro.clear_param_store() pyro.enable_validation(__debug__) # Loading data corpora = prepro_file_load("corpora") documents = list(prepro_file_load("id2pre_text").values()) documents = [re.sub("[\[\]',]", "", doc).split() for doc in documents] data = [torch.tensor(list(filter(lambda a: a != -1, corpora.doc2idx(doc))), dtype=torch.int64) for doc in documents] N = list(map(len, data)) args.num_words = len(corpora) args.num_docs = len(data) # We'll train using SVI. logging.info('Training on {} documents'.format(args.num_docs)) predictor = make_predictor(args) guide = functools.partial(parametrized_guide, predictor) Elbo = JitTraceEnum_ELBO if args.jit else Trace_ELBO elbo = Elbo(max_plate_nesting=2) optim = ClippedAdam({'lr': args.learning_rate}) svi = SVI(model, guide, optim, elbo) losses = [] logging.info('Step\tLoss') for step in tqdm(range(args.num_steps)): loss = svi.step(data, N, args=args) losses.append(loss) if step % 10 == 0: # logging.info('{: >5d}\t{}'.format(step, loss)) logging.info(f"Loss: {loss}") loss = elbo.loss(model, guide, data, N, args=args) logging.info('final loss = {}'.format(loss)) # Plot loss over iterations plt.plot(losses) plt.title("ELBO") plt.xlabel("step") plt.ylabel("loss") plot_file_name = "../loss-2017_variable-sizes_only-word-data.png" plt.savefig(plot_file_name) plt.show() # save model torch.save({"model": predictor.state_dict(), "guide": guide}, "../mymodel.pt") pyro.get_param_store().save("mymodelparams.pt")
def train_model(pmf): def model(data): # sample f from the prior # Probabilities are generated by the pmf f = pyro.sample("latent_fairness", pmf) f2 = dist.Bernoulli(f) for i in range(len(data)): s = pyro.sample("obs_{}".format(i), f2, obs=data[i]) def guide(data): alpha_q = pyro.param("alpha_q", torch.tensor(15.0), constraint=constraints.positive) beta_q = pyro.param("beta_q", torch.tensor(15.0), constraint=constraints.positive) # sample latent_fairness from the distribution Beta(alpha_q, beta_q) pyro.sample("latent_fairness", dist.Beta(alpha_q, beta_q)) adam_params = {"lr": 0.0005, "betas": (0.90, 0.999)} optimizer = ClippedAdam(adam_params) svi = SVI(model, guide, optimizer, loss=Trace_ELBO()) for step in range(n_steps): loss = svi.step(data) if step % 100 == 0: logging.info(".") logging.info("Elbo loss: {}".format(loss)) # grab the learned variational parameters a_q = pyro.param("alpha_q").item() b_q = pyro.param("beta_q").item() inferred_mean = a_q / (a_q + b_q) # compute inferred standard deviation factor = b_q / (a_q * (1.0 + a_q + b_q)) inferred_std = inferred_mean * math.sqrt(factor) print("\nbased on the data and our prior belief, the fairness " + "of the coin is %.3f +- %.3f" % (inferred_mean, inferred_std)) beta_posterior = torch.distributions.beta.Beta(a_q, b_q) posterior = torch.distributions.bernoulli.Bernoulli( beta_posterior.sample()) logging.info("Sampling:{}".format(posterior.sample()))
def main(args): logging.info('Generating data') # WL: commented. ===== # pyro.set_rng_seed(0) # ==================== pyro.clear_param_store() pyro.enable_validation(__debug__) # We can generate synthetic data directly by calling the model. true_topic_weights, true_topic_words, data = model(args=args) # We'll train using SVI. logging.info('-' * 40) logging.info('Training on {} documents'.format(args.num_docs)) predictor = make_predictor(args) guide = functools.partial(parametrized_guide, predictor) Elbo = JitTraceEnum_ELBO if args.jit else TraceEnum_ELBO elbo = Elbo(max_plate_nesting=2) optim = ClippedAdam({'lr': args.learning_rate}) svi = SVI(model, guide, optim, elbo) # WL: edited. ===== # logging.info('Step\tLoss') logging.info(args) times = [time.time()] logging.info('\nstep\t' + 'epoch\t' + 'elbo\t' + 'time(sec)') # ================= # WL: edited. ===== # for step in range(args.num_steps): for step in range(1, args.num_steps+1): # ================= loss = svi.step(data, args=args, batch_size=args.batch_size) # WL: edited. ===== # if step % 10 == 0: # logging.info('{: >5d}\t{}'.format(step, loss)) if (step % 10 == 0) or (step == 1): times.append(time.time()) logging.info(f'{step:06d}\t' f'{(step * args.batch_size) / args.num_docs:.3f}\t' f'{-loss:.4f}\t' f'{times[-1]-times[-2]:.3f}')
def train_model(data, n_steps, pmf): def model(data): mu = 2.8 num_sigmas = 4 sigma = 0.3 low = mu - num_sigmas * sigma high = mu + num_sigmas * sigma f = pyro.sample("latent", dist.Uniform(low, high)) print(f) # sample f from the prior # Probabilities are generated by the pmf for i in range(len(data)): pyro.sample("obs_{}".format(i), Poisson(f), obs=data[i]) def guide(data): lam = pyro.param("lam", torch.tensor(2.0), constraint=constraints.positive) # alpha_q = pyro.param("alpha_q", torch.tensor(2.0)) # beta_q = pyro.param("beta_q", torch.tensor(1.0)) pyro.sample("latent", dist.Poisson(torch.tensor(2.0))) adam_params = {"lr": 0.0005, "betas": (0.90, 0.999)} optimizer = ClippedAdam(adam_params) svi = SVI(model, guide, optimizer, loss=Trace_ELBO()) for step in range(n_steps): loss = svi.step(data) if step % 100 == 0: logging.info(".") logging.info("Elbo loss: {}".format(loss)) # grab the learned variational parameters lam = pyro.param("lam").item() print(lam) # a_q = pyro.param("alpha_q").item() # b_q = pyro.param("beta_q").item() # print(a_q, b_q) posterior = Poisson(lam) logging.info("Sampling:{}".format(posterior.sample()))
def test_intractable_smoke(backend): def model(): i_plate = pyro.plate("i", 2, dim=-1) j_plate = pyro.plate("j", 3, dim=-2) with i_plate: a = pyro.sample("a", dist.Normal(0, 1)) with j_plate: b = pyro.sample("b", dist.Normal(0, 1)) with i_plate, j_plate: c = pyro.sample("c", dist.Normal(a + b, 1)) pyro.sample("d", dist.Normal(c, 1), obs=torch.zeros(3, 2)) guide = AutoGaussian(model, backend=backend) svi = SVI(model, guide, ClippedAdam({"lr": 1e-8}), Trace_ELBO()) for step in range(2): with xfail_if_not_implemented(): svi.step() guide() predictive = Predictive(model, guide=guide, num_samples=2) predictive()
def run_inference(model, guide, home_id, away_id, score1, score2, args): gamma = 0.01 # final learning rate will be gamma * initial_lr lrd = gamma**(1 / args.num_iterations) svi = SVI( model=model, guide=guide, optim=ClippedAdam({ "lr": args.learning_rate, "lrd": lrd }), loss=Trace_ELBO(num_particles=args.num_particles), ) pyro.clear_param_store() # clear global parameter cache pyro.set_rng_seed(args.rng_seed) advi_loss = [] for j in range(args.num_iterations): # calculate the loss and take a gradient step loss = svi.step( home_id=home_id, away_id=away_id, score1_obs=score1, score2_obs=score2, ) advi_loss.append(loss) if j % 100 == 0: print("[iteration %4d] loss: %.4f" % (j + 1, loss)) print("Posterior: ") for i in pyro.get_param_store().items(): print(i) fit = Predictive(model=model, guide=guide, num_samples=2000)(home_id=home_id, away_id=away_id) return fit
def __init__(self, _c: "VAEConfig"): super().__init__() self._c = _c self.image_flatten_dim = _c.image_dim[0] * _c.image_dim[1] adam_params = { "lr": _c.init_lr, "betas": (0.96, 0.999), "clip_norm": 10.0, "lrd": 0.99996, "weight_decay": 2.0 } self.optimizer = ClippedAdam(adam_params) self.emitter = Decoder(_c.z_dim, _c.emitter_channel, dropout_p=_c.dropout_rate) self.trans = GatedTransition(_c.z_dim, _c.transition_dim) self.combiner = Combiner(_c.z_dim, _c.rnn_dim) self.crnn = ConvRNN(_c.image_dim, _c.rnn_dim, _c.rnn_layers, _c.dropout_rate, use_lstm=_c.use_lstm, channels=_c.crnn_channel) self.iafs = [ affine_autoregressive(_c.z_dim, hidden_dims=[_c.iaf_dim]) for _ in range(_c.num_iafs) ] self.iafs_modules = nn.ModuleList(self.iafs) self.z_0 = nn.Parameter(torch.zeros(_c.z_dim)) self.z_q_0 = nn.Parameter(torch.zeros(_c.z_dim)) self.h_0 = nn.Parameter(torch.zeros(1, 1, _c.rnn_dim)) if _c.use_lstm: self.c_0 = nn.Parameter(torch.zeros(1, 1, _c.rnn_dim)) self.cuda()
def run_svi(self, n_steps=100, num_particles=10, clear_params=False): if not clear_params: pyro.clear_param_store() opt = ClippedAdam(self.optimizer_params) svi = SVI(self.model, self.guide, opt, loss=Trace_ELBO(num_particles=num_particles)) loss = [] pred_prob = [] valid_prob = [] for step in range(n_steps): curr_loss = svi.step(self.data) prob = self.calc_log_sum(self.data, num_particles) valid_p = self.calc_log_sum(self.valid_data, num_particles) loss.append(curr_loss) pred_prob.append(prob) valid_prob.append(valid_p) if step % (n_steps // 20) == 0: message = '{:.0%} ({:.1f}) ({:.1f}) ({:.1f})'.format( step / n_steps, curr_loss, prob, valid_p) print(message, end=' | ') return loss, pred_prob, valid_prob
def train(args, dataset): """ Train a model and guide to fit a dataset. """ counts = dataset["counts"] num_stations = len(dataset["stations"]) logging.info( "Training on {} stations over {} hours, {} batches/epoch".format( num_stations, len(counts), int(math.ceil(len(counts) / args.batch_size)))) time_features = make_time_features(args, 0, len(counts)) control_features = (counts.max(1)[0] + counts.max(2)[0]).clamp(max=1) logging.info( "On average {:0.1f}/{} stations are open at any one time".format( control_features.sum(-1).mean(), num_stations)) features = torch.cat([time_features, control_features], -1) feature_dim = features.size(-1) logging.info("feature_dim = {}".format(feature_dim)) metadata = {"args": args, "losses": [], "control": control_features} torch.save(metadata, args.training_filename) def optim_config(module_name, param_name): config = { "lr": args.learning_rate, "betas": (0.8, 0.99), "weight_decay": 0.01**(1 / args.num_steps), } if param_name == "init_scale": config["lr"] *= 0.1 # init_dist sees much less data per minibatch return config training_counts = counts[:args.truncate] if args.truncate else counts data_size = len(training_counts) model = Model(args, features, training_counts) guide = Guide(args, features, training_counts) elbo = Trace_ELBO() optim = ClippedAdam(optim_config) svi = SVI(model, guide, optim, elbo) losses = [] forecaster = None for step in range(args.num_steps): begin_time = torch.randint(max(1, data_size - args.batch_size), ()).item() end_time = min(data_size, begin_time + args.batch_size) feature_batch = features[begin_time:end_time] counts_batch = counts[begin_time:end_time] loss = svi.step(feature_batch, counts_batch) / counts_batch.numel() assert math.isfinite(loss), loss losses.append(loss) logging.debug("step {} loss = {:0.4g}".format(step, loss)) if step % 20 == 0: # Save state every few steps. pyro.get_param_store().save(args.param_store_filename) metadata = { "args": args, "losses": losses, "control": control_features } torch.save(metadata, args.training_filename) forecaster = Forecaster(args, dataset, features, model, guide) torch.save(forecaster, args.forecaster_filename) if logging.Logger(None).isEnabledFor(logging.DEBUG): init_scale = pyro.param("init_scale").data trans_scale = pyro.param("trans_scale").data trans_matrix = pyro.param("trans_matrix").data eigs = trans_matrix.eig()[0].norm(dim=-1).sort( descending=True).values logging.debug("guide.diag_part = {}".format( guide.diag_part.data.squeeze())) logging.debug( "init scale min/mean/max: {:0.3g} {:0.3g} {:0.3g}".format( init_scale.min(), init_scale.mean(), init_scale.max())) logging.debug( "trans scale min/mean/max: {:0.3g} {:0.3g} {:0.3g}".format( trans_scale.min(), trans_scale.mean(), trans_scale.max())) logging.debug("trans mat eig:\n{}".format(eigs)) return forecaster
def main(args): ## ドレミ def easyTones(): training_seq_lengths = torch.tensor([8]*1) training_data_sequences = torch.zeros(1,8,88) for i in range(1): training_data_sequences[i][0][int(70-i*10) ] = 1 training_data_sequences[i][1][int(70-i*10)+2] = 1 training_data_sequences[i][2][int(70-i*10)+4] = 1 training_data_sequences[i][3][int(70-i*10)+5] = 1 training_data_sequences[i][4][int(70-i*10)+7] = 1 training_data_sequences[i][5][int(70-i*10)+9] = 1 training_data_sequences[i][6][int(70-i*10)+11] = 1 training_data_sequences[i][7][int(70-i*10)+12] = 1 return training_seq_lengths, training_data_sequences def superEasyTones(): training_seq_lengths = torch.tensor([8]*10) training_data_sequences = torch.zeros(10,8,88) for i in range(10): for j in range(8): training_data_sequences[i][j][int(30+i*5)] = 1 return training_seq_lengths, training_data_sequences ## ドドド、ドドド、ドドド def easiestTones(): training_seq_lengths = torch.tensor([8]*10) training_data_sequences = torch.zeros(10,8,88) for i in range(10): for j in range(8): training_data_sequences[i][j][int(70)] = 1 return training_seq_lengths, training_data_sequences # setup logging logging.basicConfig(level=logging.DEBUG, format='%(message)s', filename=args.log, filemode='w') console = logging.StreamHandler() console.setLevel(logging.INFO) logging.getLogger('').addHandler(console) logging.info(args) data = poly.load_data(poly.JSB_CHORALES) training_seq_lengths = data['train']['sequence_lengths'] training_data_sequences = data['train']['sequences'] training_seq_lengths, training_data_sequences = easiestTones() test_seq_lengths = data['test']['sequence_lengths'] test_data_sequences = data['test']['sequences'] test_seq_lengths, test_data_sequences = easiestTones() val_seq_lengths = data['valid']['sequence_lengths'] val_data_sequences = data['valid']['sequences'] val_seq_lengths, val_data_sequences = easiestTones() N_train_data = len(training_seq_lengths) N_train_time_slices = float(torch.sum(training_seq_lengths)) N_mini_batches = int(N_train_data / args.mini_batch_size + int(N_train_data % args.mini_batch_size > 0)) logging.info("N_train_data: %d avg. training seq. length: %.2f N_mini_batches: %d" % (N_train_data, training_seq_lengths.float().mean(), N_mini_batches)) # how often we do validation/test evaluation during training val_test_frequency = 50 # the number of samples we use to do the evaluation n_eval_samples = 1 # package repeated copies of val/test data for faster evaluation # (i.e. set us up for vectorization) def rep(x): rep_shape = torch.Size([x.size(0) * n_eval_samples]) + x.size()[1:] repeat_dims = [1] * len(x.size()) repeat_dims[0] = n_eval_samples return x.repeat(repeat_dims).reshape(n_eval_samples, -1).transpose(1, 0).reshape(rep_shape) # get the validation/test data ready for the dmm: pack into sequences, etc. val_seq_lengths = rep(val_seq_lengths) test_seq_lengths = rep(test_seq_lengths) val_batch, val_batch_reversed, val_batch_mask, val_seq_lengths = poly.get_mini_batch( torch.arange(n_eval_samples * val_data_sequences.shape[0]), rep(val_data_sequences), val_seq_lengths, cuda=args.cuda) test_batch, test_batch_reversed, test_batch_mask, test_seq_lengths = poly.get_mini_batch( torch.arange(n_eval_samples * test_data_sequences.shape[0]), rep(test_data_sequences), test_seq_lengths, cuda=args.cuda) # instantiate the dmm dmm = DMM(rnn_dropout_rate=args.rnn_dropout_rate, num_iafs=args.num_iafs, iaf_dim=args.iaf_dim, use_cuda=args.cuda) # setup optimizer adam_params = {"lr": args.learning_rate, "betas": (args.beta1, args.beta2), "clip_norm": args.clip_norm, "lrd": args.lr_decay, "weight_decay": args.weight_decay} adam = ClippedAdam(adam_params) # setup inference algorithm if args.tmc: if args.jit: raise NotImplementedError("no JIT support yet for TMC") tmc_loss = TraceTMC_ELBO() dmm_guide = config_enumerate(dmm.guide, default="parallel", num_samples=args.tmc_num_samples, expand=False) svi = SVI(dmm.model, dmm_guide, adam, loss=tmc_loss) elif args.tmcelbo: if args.jit: raise NotImplementedError("no JIT support yet for TMC ELBO") elbo = TraceEnum_ELBO() dmm_guide = config_enumerate(dmm.guide, default="parallel", num_samples=args.tmc_num_samples, expand=False) svi = SVI(dmm.model, dmm_guide, adam, loss=elbo) else: elbo = JitTrace_ELBO() if args.jit else Trace_ELBO() svi = SVI(dmm.model, dmm.guide, adam, loss=elbo) # now we're going to define some functions we need to form the main training loop # saves the model and optimizer states to disk def save_checkpoint(): logging.info("saving model to %s..." % args.save_model) torch.save(dmm.state_dict(), args.save_model) # logging.info("saving optimizer states to %s..." % args.save_opt) # adam.save(args.save_opt) logging.info("done saving model and optimizer checkpoints to disk.") # loads the model and optimizer states from disk def load_checkpoint(): assert exists(args.load_opt) and exists(args.load_model), \ "--load-model and/or --load-opt misspecified" logging.info("loading model from %s..." % args.load_model) dmm.load_state_dict(torch.load(args.load_model)) logging.info("loading optimizer states from %s..." % args.load_opt) adam.load(args.load_opt) logging.info("done loading model and optimizer states.") # prepare a mini-batch and take a gradient step to minimize -elbo def process_minibatch(epoch, which_mini_batch, shuffled_indices): if args.annealing_epochs > 0 and epoch < args.annealing_epochs: # compute the KL annealing factor approriate for the current mini-batch in the current epoch min_af = args.minimum_annealing_factor annealing_factor = min_af + (1.0 - min_af) * \ (float(which_mini_batch + epoch * N_mini_batches + 1) / float(args.annealing_epochs * N_mini_batches)) else: # by default the KL annealing factor is unity annealing_factor = 1.0 # compute which sequences in the training set we should grab mini_batch_start = (which_mini_batch * args.mini_batch_size) mini_batch_end = np.min([(which_mini_batch + 1) * args.mini_batch_size, N_train_data]) mini_batch_indices = shuffled_indices[mini_batch_start:mini_batch_end] # grab a fully prepped mini-batch using the helper function in the data loader mini_batch, mini_batch_reversed, mini_batch_mask, mini_batch_seq_lengths \ = poly.get_mini_batch(mini_batch_indices, training_data_sequences, training_seq_lengths, cuda=args.cuda) # do an actual gradient step loss = svi.step(mini_batch, mini_batch_reversed, mini_batch_mask, mini_batch_seq_lengths, annealing_factor) # keep track of the training loss return loss # helper function for doing evaluation def do_evaluation(): # put the RNN into evaluation mode (i.e. turn off drop-out if applicable) dmm.rnn.eval() # compute the validation and test loss n_samples many times val_nll = svi.evaluate_loss(val_batch, val_batch_reversed, val_batch_mask, val_seq_lengths) / float(torch.sum(val_seq_lengths)) test_nll = svi.evaluate_loss(test_batch, test_batch_reversed, test_batch_mask, test_seq_lengths) / float(torch.sum(test_seq_lengths)) # put the RNN back into training mode (i.e. turn on drop-out if applicable) dmm.rnn.train() return val_nll, test_nll # if checkpoint files provided, load model and optimizer states from disk before we start training if args.load_opt != '' and args.load_model != '': load_checkpoint() ################# # TRAINING LOOP # ################# times = [time.time()] for epoch in range(args.num_epochs): # if specified, save model and optimizer states to disk every checkpoint_freq epochs if args.checkpoint_freq > 0 and epoch > 0 and epoch % args.checkpoint_freq == 0: save_checkpoint() # accumulator for our estimate of the negative log likelihood (or rather -elbo) for this epoch epoch_nll = 0.0 # prepare mini-batch subsampling indices for this epoch shuffled_indices = torch.randperm(N_train_data) # process each mini-batch; this is where we take gradient steps for which_mini_batch in range(N_mini_batches): epoch_nll += process_minibatch(epoch, which_mini_batch, shuffled_indices) # report training diagnostics times.append(time.time()) epoch_time = times[-1] - times[-2] logging.info("[training epoch %04d] %.4f \t\t\t\t(dt = %.3f sec)" % (epoch, epoch_nll / N_train_time_slices, epoch_time)) # do evaluation on test and validation data and report results if val_test_frequency > 0 and epoch > 0 and epoch % val_test_frequency == 0: val_nll, test_nll = do_evaluation() logging.info("[val/test epoch %04d] %.4f %.4f" % (epoch, val_nll, test_nll))
# Convert the data into tensors X_train_torch = torch.tensor(X_train_scaled) y_train_torch = torch.tensor(y_train_scaled) pyro.clear_param_store() # Provide a guide which fits a pre-defined distribution over each # hidden parameter. The AutoDiagonalNormal guide fits a normal # distribution over each coefficient and our rate parameter my_guide = AutoDiagonalNormal(model_gamma) # Initialize the SVI optimzation class my_svi = SVI(model=model_gamma, guide= my_guide, optim=ClippedAdam({"lr": 0.01, 'clip_norm': 1.0}), loss=Trace_ELBO()) losses = [] start_time = time.time() # Perform optimization for i in range(5000): loss = my_svi.step(X_train_torch, y_train_torch, california.feature_names) normalized_loss = loss/X_train_torch.shape[0]
def main(args): # Init tensorboard writer = SummaryWriter('./runs/' + args.runname + str(args.trialnumber)) model_name = 'VanillaDMM' # Set evaluation log file evaluation_logpath = './logs/{}/evaluation_result.log'.format( model_name.lower()) log_evaluation(evaluation_logpath, 'Evaluation Trial - {}\n'.format(args.trialnumber)) # Constants time_length = 30 input_length_for_pred = 20 pred_length = time_length - input_length_for_pred train_batch_size = 16 valid_batch_size = 1 # For model input_channels = 1 z_channels = 50 emission_channels = [64, 32] transition_channels = 64 encoder_channels = [32, 64] rnn_input_dim = 256 rnn_channels = 128 kernel_size = 3 pred_length = 0 # Device checking use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") # Make dataset logging.info("Generate data") train_datapath = args.datapath / 'train' valid_datapath = args.datapath / 'valid' train_dataset = DiffusionDataset(train_datapath) valid_dataset = DiffusionDataset(valid_datapath) # Create data loaders from pickle data logging.info("Generate data loaders") train_dataloader = DataLoader( train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=8) valid_dataloader = DataLoader( valid_dataset, batch_size=valid_batch_size, num_workers=4) # Training parameters width = 100 height = 100 input_dim = width * height # Create model logging.warning("Generate model") logging.warning(input_dim) pred_input_dim = 10 dmm = DMM(input_channels=input_channels, z_channels=z_channels, emission_channels=emission_channels, transition_channels=transition_channels, encoder_channels=encoder_channels, rnn_input_dim=rnn_input_dim, rnn_channels=rnn_channels, kernel_size=kernel_size, height=height, width=width, pred_input_dim=pred_input_dim, num_layers=1, rnn_dropout_rate=0.0, num_iafs=0, iaf_dim=50, use_cuda=use_cuda) # Initialize model logging.info("Initialize model") epochs = args.endepoch learning_rate = 0.0001 beta1 = 0.9 beta2 = 0.999 clip_norm = 10.0 lr_decay = 1.0 weight_decay = 0 adam_params = {"lr": learning_rate, "betas": (beta1, beta2), "clip_norm": clip_norm, "lrd": lr_decay, "weight_decay": weight_decay} adam = ClippedAdam(adam_params) elbo = Trace_ELBO() svi = SVI(dmm.model, dmm.guide, adam, loss=elbo) # saves the model and optimizer states to disk save_model = Path('./checkpoints/' + model_name) def save_checkpoint(epoch): save_dir = save_model / '{}.model'.format(epoch) save_opt_dir = save_model / '{}.opt'.format(epoch) logging.info("saving model to %s..." % save_dir) torch.save(dmm.state_dict(), save_dir) logging.info("saving optimizer states to %s..." % save_opt_dir) adam.save(save_opt_dir) logging.info("done saving model and optimizer checkpoints to disk.") # Starting epoch start_epoch = args.startepoch # loads the model and optimizer states from disk if start_epoch != 0: load_opt = './checkpoints/' + model_name + \ '/e{}-i188-opt-tn{}.opt'.format(start_epoch - 1, args.trialnumber) load_model = './checkpoints/' + model_name + \ '/e{}-i188-tn{}.pt'.format(start_epoch - 1, args.trialnumber) def load_checkpoint(): # assert exists(load_opt) and exists(load_model), \ # "--load-model and/or --load-opt misspecified" logging.info("loading model from %s..." % load_model) dmm.load_state_dict(torch.load(load_model, map_location=device)) # logging.info("loading optimizer states from %s..." % load_opt) # adam.load(load_opt) # logging.info("done loading model and optimizer states.") if load_model != '': logging.info('Load checkpoint') load_checkpoint() # Validation only? validation_only = args.validonly # Train the model if not validation_only: logging.info("Training model") annealing_epochs = 1000 minimum_annealing_factor = 0.2 N_train_size = 3000 N_mini_batches = int(N_train_size / train_batch_size + int(N_train_size % train_batch_size > 0)) for epoch in tqdm(range(start_epoch, epochs), desc='Epoch', leave=True): r_loss_train = 0 dmm.train(True) idx = 0 mov_avg_loss = 0 mov_data_len = 0 for which_mini_batch, data in enumerate(tqdm(train_dataloader, desc='Train', leave=True)): if annealing_epochs > 0 and epoch < annealing_epochs: # compute the KL annealing factor approriate for the current mini-batch in the current epoch min_af = minimum_annealing_factor annealing_factor = min_af + (1.0 - min_af) * \ (float(which_mini_batch + epoch * N_mini_batches + 1) / float(annealing_epochs * N_mini_batches)) else: # by default the KL annealing factor is unity annealing_factor = 1.0 data['observation'] = normalize( data['observation'].unsqueeze(2).to(device)) batch_size, length, _, w, h = data['observation'].shape data_reversed = reverse_sequences(data['observation']) data_mask = torch.ones( batch_size, length, input_channels, w, h).cuda() loss = svi.step(data['observation'], data_reversed, data_mask, annealing_factor) # Running losses mov_avg_loss += loss mov_data_len += batch_size r_loss_train += loss idx += 1 # Average losses train_loss_avg = r_loss_train / (len(train_dataset) * time_length) writer.add_scalar('Loss/train', train_loss_avg, epoch) logging.info("Epoch: %d, Training loss: %1.5f", epoch, train_loss_avg) # # Time to time evaluation if epoch == epochs - 1: for temp_pred_length in [20]: r_loss_valid = 0 r_loss_loc_valid = 0 r_loss_scale_valid = 0 r_loss_latent_valid = 0 dmm.train(False) val_pred_length = temp_pred_length val_pred_input_length = 10 with torch.no_grad(): for i, data in enumerate(tqdm(valid_dataloader, desc='Eval', leave=True)): data['observation'] = normalize( data['observation'].unsqueeze(2).to(device)) batch_size, length, _, w, h = data['observation'].shape data_reversed = reverse_sequences( data['observation']) data_mask = torch.ones( batch_size, length, input_channels, w, h).cuda() pred_tensor = data['observation'][:, :input_length_for_pred, :, :, :] pred_tensor_reversed = reverse_sequences( pred_tensor) pred_tensor_mask = torch.ones( batch_size, input_length_for_pred, input_channels, w, h).cuda() ground_truth = data['observation'][:, input_length_for_pred:, :, :, :] val_nll = svi.evaluate_loss( data['observation'], data_reversed, data_mask) preds, _, loss_loc, loss_scale = do_prediction_rep_inference( dmm, pred_tensor_mask, val_pred_length, val_pred_input_length, data['observation']) ground_truth = denormalize( data['observation'].squeeze().cpu().detach() ) pred_with_input = denormalize( torch.cat( [data['observation'][:, :-val_pred_length, :, :, :].squeeze(), preds.squeeze()], dim=0 ).cpu().detach() ) # Running losses r_loss_valid += val_nll r_loss_loc_valid += loss_loc r_loss_scale_valid += loss_scale # Average losses valid_loss_avg = r_loss_valid / \ (len(valid_dataset) * time_length) valid_loss_loc_avg = r_loss_loc_valid / \ (len(valid_dataset) * val_pred_length * width * height) valid_loss_scale_avg = r_loss_scale_valid / \ (len(valid_dataset) * val_pred_length * width * height) writer.add_scalar('Loss/test', valid_loss_avg, epoch) writer.add_scalar( 'Loss/test_obs', valid_loss_loc_avg, epoch) writer.add_scalar('Loss/test_scale', valid_loss_scale_avg, epoch) logging.info("Validation loss: %1.5f", valid_loss_avg) logging.info("Validation obs loss: %1.5f", valid_loss_loc_avg) logging.info("Validation scale loss: %1.5f", valid_loss_scale_avg) log_evaluation(evaluation_logpath, "Validation obs loss for {}s pred {}: {}\n".format( val_pred_length, args.trialnumber, valid_loss_loc_avg)) log_evaluation(evaluation_logpath, "Validation scale loss for {}s pred {}: {}\n".format( val_pred_length, args.trialnumber, valid_loss_scale_avg)) # Save model if epoch % 50 == 0 or epoch == epochs - 1: torch.save(dmm.state_dict(), args.modelsavepath / model_name / 'e{}-i{}-tn{}.pt'.format(epoch, idx, args.trialnumber)) adam.save(args.modelsavepath / model_name / 'e{}-i{}-opt-tn{}.opt'.format(epoch, idx, args.trialnumber)) # Last validation after training test_samples_indices = range(100) total_n = 0 if validation_only: r_loss_loc_valid = 0 r_loss_scale_valid = 0 r_loss_latent_valid = 0 dmm.train(False) val_pred_length = args.validpredlength val_pred_input_length = 10 with torch.no_grad(): for i in tqdm(test_samples_indices, desc='Valid', leave=True): # Data processing data = valid_dataset[i] if torch.isnan(torch.sum(data['observation'])): print("Skip {}".format(i)) continue else: total_n += 1 data['observation'] = normalize( data['observation'].unsqueeze(0).unsqueeze(2).to(device)) batch_size, length, _, w, h = data['observation'].shape data_reversed = reverse_sequences(data['observation']) data_mask = torch.ones( batch_size, length, input_channels, w, h).to(device) # Prediction pred_tensor_mask = torch.ones( batch_size, input_length_for_pred, input_channels, w, h).to(device) preds, _, loss_loc, loss_scale = do_prediction_rep_inference( dmm, pred_tensor_mask, val_pred_length, val_pred_input_length, data['observation']) ground_truth = denormalize( data['observation'].squeeze().cpu().detach() ) pred_with_input = denormalize( torch.cat( [data['observation'][:, :-val_pred_length, :, :, :].squeeze(), preds.squeeze()], dim=0 ).cpu().detach() ) # Save samples if i < 5: save_dir_samples = Path('./samples/more_variance_long') with open(save_dir_samples / '{}-gt-test.pkl'.format(i), 'wb') as fout: pickle.dump(ground_truth, fout) with open(save_dir_samples / '{}-vanilladmm-pred-test.pkl'.format(i), 'wb') as fout: pickle.dump(pred_with_input, fout) # Running losses r_loss_loc_valid += loss_loc r_loss_scale_valid += loss_scale r_loss_latent_valid += np.sum((preds.squeeze().detach().cpu().numpy( ) - data['latent'][time_length - val_pred_length:, :, :].detach().cpu().numpy()) ** 2) # Average losses test_samples_indices = range(total_n) print(total_n) valid_loss_loc_avg = r_loss_loc_valid / \ (total_n * val_pred_length * width * height) valid_loss_scale_avg = r_loss_scale_valid / \ (total_n * val_pred_length * width * height) valid_loss_latent_avg = r_loss_latent_valid / \ (total_n * val_pred_length * width * height) logging.info("Validation obs loss for %ds pred VanillaDMM: %f", val_pred_length, valid_loss_loc_avg) logging.info("Validation latent loss: %f", valid_loss_latent_avg) with open('VanillaDMMResult.log', 'a+') as fout: validation_log = 'Pred {}s VanillaDMM: {}\n'.format( val_pred_length, valid_loss_loc_avg) fout.write(validation_log)
def main(args): """ run inference for CVAE :param args: arguments for CVAE :return: None """ if args.seed is not None: set_seed(args.seed, args.cuda) if os.path.exists('cvae.model.pt'): print('Loading model %s' % 'cvae.model.pt') cvae = torch.load('cvae.model.pt') else: cvae = CVAE(z_dim=args.z_dim, y_dim=8, x_dim=32612, hidden_dim=args.hidden_dimension, use_cuda=args.cuda) print(cvae) # setup the optimizer adam_params = { "lr": args.learning_rate, "betas": (args.beta_1, 0.999), "clip_norm": 0.5 } optimizer = ClippedAdam(adam_params) guide = config_enumerate(cvae.guide, args.enum_discrete) # set up the loss for inference. loss = SVI(cvae.model, guide, optimizer, loss=TraceEnum_ELBO(max_iarange_nesting=1)) try: # setup the logger if a filename is provided logger = open(args.logfile, "w") if args.logfile else None data_loaders = setup_data_loaders(NHANES, args.cuda, args.batch_size) print(len(data_loaders['train'])) print(len(data_loaders['test'])) print(len(data_loaders['valid'])) # initializing local variables to maintain the best validation acc # seen across epochs over the supervised training set # and the corresponding testing set and the state of the networks best_valid_err, best_test_err = float('inf'), float('inf') # run inference for a certain number of epochs for i in range(0, args.num_epochs): # get the losses for an epoch epoch_losses = \ run_inference_for_epoch(args.batch_size, data_loaders, loss, args.cuda) # compute average epoch losses i.e. losses per example avg_epoch_losses = epoch_losses / NHANES.train_size # store the losses in the logfile str_loss = str(avg_epoch_losses) str_print = "{} epoch: avg loss {}".format(i, "{}".format(str_loss)) validation_err = get_accuracy(data_loaders["valid"], cvae.sim_measurements) str_print += " validation error {}".format(validation_err) # this test accuracy is only for logging, this is not used # to make any decisions during training test_еrr = get_accuracy(data_loaders["test"], cvae.sim_measurements) str_print += " test error {}".format(test_еrr) # update the best validation accuracy and the corresponding # testing accuracy and the state of the parent module (including the networks) if best_valid_err > validation_err: best_valid_err = validation_err if best_test_err > test_еrr: best_test_err = test_еrr print_and_log(logger, str_print) final_test_accuracy = get_accuracy(data_loaders["test"], cvae.sim_measurements) print_and_log( logger, "best validation error {} corresponding testing error {} " "last testing error {}".format(best_valid_err, best_test_err, final_test_accuracy)) torch.save(cvae, 'cvae.model.pt') #mu, sigma, actuals, lods, masks = get_predictions(data_loaders["prediction"], cvae.sim_measurements) #torch.save((mu, sigma, actuals, lods, masks), 'cvae.predictions.pt') finally: # close the logger file object if we opened it earlier if args.logfile: logger.close()
def main_test_mnist(): from torchvision.datasets import MNIST from torchvision.transforms import Compose, ToTensor, ToPILImage, Normalize transform = Compose([ToTensor()]) train_dataset = MNIST(root="/tmp", train=True, download=True, transform=transform) test_dataset = MNIST(root="/tmp", train=False, download=True, transform=transform) vae = VAE(x_dim=784, z_dim=50, device='cuda' if torch.cuda.is_available() else 'cpu') logger.info(f"\n{vae}") optimizer = ClippedAdam({"lr": 1e-3}) svi = SVI(vae.model, vae.guide, optimizer, loss=Trace_ELBO()) def _update(engine, batch): vae.train() x, y = batch loss = svi.step(x.view(-1, 784).to(vae.device, non_blocking=True)) return loss / len(x), len(x) def _evaluate(engine, batch): vae.eval() x, y = batch elbo = svi.evaluate_loss( x.view(-1, 784).to(vae.device, non_blocking=True)) return elbo / len(x), len(x) trainer = Engine(_update) evaluater = Engine(_evaluate) train_dataloader = DataLoader(train_dataset, batch_size=256, shuffle=True, pin_memory=True, drop_last=True, num_workers=8) test_dataloader = DataLoader(test_dataset, batch_size=256, shuffle=True, pin_memory=True, drop_last=True, num_workers=8) timer = Timer(average=True) timer.attach(engine=trainer, start=Events.EPOCH_STARTED, pause=Events.ITERATION_COMPLETED, resume=Events.ITERATION_STARTED, step=Events.ITERATION_COMPLETED) loss_metric = RunningAverage(output_transform=lambda outputs: -outputs[0], alpha=1) loss_metric.attach(engine=trainer, name="ELBO") loss_metric.attach(engine=evaluater, name="ELBO") vis = Visdom(server="gpu1.cluster.peidan.me", port=10697, env='Imp-pyro--vae-MNIST') @trainer.on(Events.EPOCH_COMPLETED) def log_train_loss(engine): elbo = engine.state.metrics['ELBO'] logger.info( f"epoch:{engine.state.epoch}, ELBO: {elbo:.2f}, step time: {timer.value():.3f}s" ) vis.line(Y=[elbo], X=[engine.state.epoch], win="Train-ELBO", update='append', opts={"title": "Train-ELBO"}) def plot_vae_samples(title): x = torch.zeros([1, 784]).to(vae.device) for i in range(10): images = [] for rr in range(100): # get loc from the model sample_loc_i = vae.model(x) img = sample_loc_i[0].view(1, 28, 28).cpu().data.numpy() images.append(img) vis.images(images, 10, 2, win=title, opts={'title': title}) @trainer.on(Events.EPOCH_COMPLETED) def generate_samples(engine): epoch = engine.state.epoch if epoch % 10 == 0: logger.info(f"epoch: {epoch}, plot samples") plot_vae_samples(f"epoch-{epoch}") @trainer.on(Events.EPOCH_COMPLETED) def validation(engine): epoch = engine.state.epoch if epoch % 5 == 0: evaluater.run(test_dataloader) elbo = evaluater.state.metrics['ELBO'] logger.info(f"epoch: {epoch}, validation ELBO: {elbo}") vis.line(Y=[elbo], X=[engine.state.epoch], win="Validation-ELBO", update='append', opts={'title': "Validation-ELBO"}) trainer.run(train_dataloader, max_epochs=2500)
iaf_dim=50, use_cuda=True) learning_rate = 0.01 beta1 = 0.9 beta2 = 0.999 clip_norm = 10.0 lr_decay = 1.0 weight_decay = 0 adam_params = { "lr": learning_rate, "betas": (beta1, beta2), "clip_norm": clip_norm, "lrd": lr_decay, "weight_decay": weight_decay } adam = ClippedAdam(adam_params) elbo = Trace_ELBO() svi = SVI(dmm.model, dmm.guide, adam, loss=elbo) for i in range(100): loss = svi.step(input_tensor, input_tensor_reversed, input_tensor_mask) val_nll = svi.evaluate_loss(input_tensor, input_tensor_reversed, input_tensor_mask) print(val_nll) _, _, loss_loc, loss_scale = do_prediction(dmm, pred_tensor, pred_tensor_reversed, pred_tensor_mask, 5, ground_truth) print(loss_loc, loss_scale)
def main(args): # setup logging log = get_logger(args.log) log(args) data = poly.load_data(poly.JSB_CHORALES) training_seq_lengths = data['train']['sequence_lengths'] training_data_sequences = data['train']['sequences'] test_seq_lengths = data['test']['sequence_lengths'] test_data_sequences = data['test']['sequences'] val_seq_lengths = data['valid']['sequence_lengths'] val_data_sequences = data['valid']['sequences'] #=== wy: to force batch_size be 20 during training, testing, and validation. if debug: print("===== after load_data =====") print("training_data_sequences:\t shape={}".format( training_data_sequences.size())) print("test_data_sequences:\t shape={}".format( test_data_sequences.size())) print("val_data_sequences:\t shape={}".format( val_data_sequences.size())) print("===== after load_data =====") d1_training = int(len(training_seq_lengths) / args.mini_batch_size) * args.mini_batch_size d1_test = args.mini_batch_size d1_val = args.mini_batch_size training_seq_lengths = training_seq_lengths[:d1_training] training_data_sequences = training_data_sequences[:d1_training] test_seq_lengths = test_seq_lengths[:d1_test] test_data_sequences = test_data_sequences[:d1_test] val_seq_lengths = val_seq_lengths[:d1_val] val_data_sequences = val_data_sequences[:d1_val] #=== wy N_train_data = len(training_seq_lengths) N_train_time_slices = float(torch.sum(training_seq_lengths)) N_mini_batches = int(N_train_data / args.mini_batch_size + int(N_train_data % args.mini_batch_size > 0)) log("N_train_data: %d avg. training seq. length: %.2f N_mini_batches: %d" % (N_train_data, training_seq_lengths.float().mean(), N_mini_batches)) # how often we do validation/test evaluation during training #=== wy # val_test_frequency = 50 # WL: edited. ===== # val_test_frequency = 1 val_test_frequency = 0 # ================= #=== wy # the number of samples we use to do the evaluation n_eval_samples = 1 # package repeated copies of val/test data for faster evaluation # (i.e. set us up for vectorization) def rep(x): rep_shape = torch.Size([x.size(0) * n_eval_samples]) + x.size()[1:] repeat_dims = [1] * len(x.size()) repeat_dims[0] = n_eval_samples return x.repeat(repeat_dims).reshape(n_eval_samples, -1).transpose( 1, 0).reshape(rep_shape) # get the validation/test data ready for the dmm: pack into sequences, etc. val_seq_lengths = rep(val_seq_lengths) test_seq_lengths = rep(test_seq_lengths) val_batch, val_batch_reversed, val_batch_mask, val_seq_lengths = poly.get_mini_batch( torch.arange(n_eval_samples * val_data_sequences.shape[0]), rep(val_data_sequences), val_seq_lengths, cuda=args.cuda) test_batch, test_batch_reversed, test_batch_mask, test_seq_lengths = poly.get_mini_batch( torch.arange(n_eval_samples * test_data_sequences.shape[0]), rep(test_data_sequences), test_seq_lengths, cuda=args.cuda) # instantiate the dmm dmm = DMM(rnn_dropout_rate=args.rnn_dropout_rate, num_iafs=args.num_iafs, iaf_dim=args.iaf_dim, use_cuda=args.cuda) # setup optimizer adam_params = { "lr": args.learning_rate, "betas": (args.beta1, args.beta2), "clip_norm": args.clip_norm, "lrd": args.lr_decay, "weight_decay": args.weight_decay } adam = ClippedAdam(adam_params) # setup inference algorithm elbo = JitTrace_ELBO() if args.jit else Trace_ELBO() svi = SVI(model, guide, adam, loss=elbo) # now we're going to define some functions we need to form the main training loop # saves the model and optimizer states to disk def save_checkpoint(): log("saving model to %s..." % args.save_model) torch.save(dmm.state_dict(), args.save_model) log("saving optimizer states to %s..." % args.save_opt) adam.save(args.save_opt) log("done saving model and optimizer checkpoints to disk.") # loads the model and optimizer states from disk def load_checkpoint(): assert exists(args.load_opt) and exists(args.load_model), \ "--load-model and/or --load-opt misspecified" log("loading model from %s..." % args.load_model) dmm.load_state_dict(torch.load(args.load_model)) log("loading optimizer states from %s..." % args.load_opt) adam.load(args.load_opt) log("done loading model and optimizer states.") # prepare a mini-batch and take a gradient step to minimize -elbo def process_minibatch(epoch, which_mini_batch, shuffled_indices): if args.annealing_epochs > 0 and epoch < args.annealing_epochs: # compute the KL annealing factor approriate for the current mini-batch in the current epoch min_af = args.minimum_annealing_factor annealing_factor = min_af + (1.0 - min_af) * \ (float(which_mini_batch + epoch * N_mini_batches + 1) / float(args.annealing_epochs * N_mini_batches)) else: # by default the KL annealing factor is unity annealing_factor = 1.0 # compute which sequences in the training set we should grab mini_batch_start = (which_mini_batch * args.mini_batch_size) mini_batch_end = np.min([(which_mini_batch + 1) * args.mini_batch_size, N_train_data]) mini_batch_indices = shuffled_indices[mini_batch_start:mini_batch_end] # grab a fully prepped mini-batch using the helper function in the data loader if debug: print("===== process_minibatch:S =====") mini_batch, mini_batch_reversed, mini_batch_mask, mini_batch_seq_lengths \ = poly.get_mini_batch(mini_batch_indices, training_data_sequences, training_seq_lengths, cuda=args.cuda) if debug: print("===== process_minibatch:E =====") # do an actual gradient step loss = svi.step(mini_batch, mini_batch_reversed, mini_batch_mask, mini_batch_seq_lengths, annealing_factor) # keep track of the training loss return loss # helper function for doing evaluation def do_evaluation(): # put the RNN into evaluation mode (i.e. turn off drop-out if applicable) rnn.eval() # compute the validation and test loss n_samples many times val_nll = svi.evaluate_loss( val_batch, val_batch_reversed, val_batch_mask, val_seq_lengths) / torch.sum(val_seq_lengths) test_nll = svi.evaluate_loss( test_batch, test_batch_reversed, test_batch_mask, test_seq_lengths) / torch.sum(test_seq_lengths) # put the RNN back into training mode (i.e. turn on drop-out if applicable) rnn.train() return val_nll, test_nll # if checkpoint files provided, load model and optimizer states from disk before we start training if args.load_opt != '' and args.load_model != '': load_checkpoint() ################# # TRAINING LOOP # ################# times = [time.time()] # WL: added. ===== log("\nepoch\t" + "elbo\t" + "time(sec)") # ================ for epoch in range(args.num_epochs): # if specified, save model and optimizer states to disk every checkpoint_freq epochs if args.checkpoint_freq > 0 and epoch > 0 and epoch % args.checkpoint_freq == 0: save_checkpoint() # accumulator for our estimate of the negative log likelihood (or rather -elbo) for this epoch epoch_nll = 0.0 # prepare mini-batch subsampling indices for this epoch shuffled_indices = torch.randperm(N_train_data) # process each mini-batch; this is where we take gradient steps for which_mini_batch in range(N_mini_batches): epoch_nll += process_minibatch(epoch, which_mini_batch, shuffled_indices) # report training diagnostics times.append(time.time()) epoch_time = times[-1] - times[-2] # WL: edited. ===== # log("[training epoch %04d] %.4f \t\t\t\t(dt = %.3f sec)" % # (epoch, epoch_nll / N_train_time_slices, epoch_time)) log(f"{epoch:06d}\t" f"{-epoch_nll / N_train_time_slices:.4f}\t" f"{epoch_time:.3f}") # ================= # do evaluation on test and validation data and report results if val_test_frequency > 0 and epoch > 0 and epoch % val_test_frequency == 0: val_nll, test_nll = do_evaluation() log("[val/test epoch %04d] %.4f %.4f" % (epoch, val_nll, test_nll))
def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: if self._training_inputs is None: raise Exception('Cannot fit when no training data is present.') if self._fitted: return base.CallResult(None) # Extract curated data into X and Y's X_train = self._training_inputs[['unique_id', 'ds']] X_train['x'] = '1' y_train = self._training_inputs[['unique_id', 'ds', 'y']] y_train['y'] += self._constant # To remove missing values if timeout is None: timeout = np.inf if iterations is None: _iterations = self._num_iterations else: _iterations = iterations # Dataloader training_set = Dataset(config=self.hyperparams, X=X_train, y=y_train, min_series_length=self.hyperparams['seq_length']) # If the length is less than hyperparams, defaults to the minimum in dataset self._seq_length = min(self.hyperparams['seq_length'], training_set.min_series_length) # Dataset Parameters train_params = {'batch_size': self._batch_size, 'shuffle': True} # Data Generators training_generator = data.DataLoader(training_set, **train_params) # Setup Model self._obs_dim = training_set.n_series self._net = self._create_dmm() adam = ClippedAdam(self._adam_params) self._optimizer = SVI(self._net.model, self._net.guide, adam, "ELBO") # Train functions self._iterations_done = 0 self._has_finished = False # Set model to training self._net.train() for iters in _iterations: epoch_nll = 0.0 iteration_count = 0 for local_batch, local_labels in training_generator: _local_training_batch = torch.cat((local_batch, local_labels), axis=2) mini_batch, mini_batch_reversed = self._reverse_sequences(mini_batch=_local_training_batch) # Loss for minibatch and minibatch reversed loss = self._optimizer.step(mini_batch, mini_batch_reversed) iteration_count += 1 epoch_nll += loss # Break if np.isnan(loss): print("minibatch nan-ed out!") break if np.isinf(loss): print("minibatch inf-ed out!") break print("[training epoch %04d] %.4f " % (epoch, epoch_nll/iteration_count)) self._fitted = True return CallResult(None)
def main(args): pyro.enable_validation(is_validate=True) dataset = BitextDataSet(args.L1, args.L2,sentence_limit=5) dataloader = DataLoader(dataset, batch_size=64, collate_fn=custom_collator, shuffle=True) #pyro fun, all of it is still broken :P #vnmt = VNMT(dataset.getSrcWord2Index(), dataset.getTgtWord2Index(),use_cuda=True) #pyroseq2seq = Seq2Seq(dataset.getSrcWord2Index(), dataset.getTgtWord2Index(), use_cuda=True) #rnnsearch = RNNSearch(dataset.getSrcWord2Index(), dataset.getTgtWord2Index(), use_cuda=True) #classical approach #seq2seq = Seq2Seq(dataset.getSrcWord2Index(), dataset.getTgtWord2Index(), use_cuda=True) model = make_model(dataset.getSrcWord2Index(), dataset.getTgtWord2Index()) #print(vnmt) #vnmt = vnmt.cuda() # setup optimizer adam_params = {"lr": 0.003, #"betas": (args.beta1, args.beta2), "clip_norm": 20.0, "lrd": .99996, "weight_decay": 0.0} adam = ClippedAdam(adam_params) # setup inference algorithm elbo = JitTrace_ELBO() if False else Trace_ELBO() pad_indx = model.trg_embed.getWord2Index('<PAD>') optim = torch.optim.Adam(model.parameters(), lr=1e-3) loss_compute = SimpleLossCompute(model.generator, nn.NLLLoss(reduction="sum",ignore_index=pad_indx), opt=optim) #svi = SVI(vnmt.model, vnmt.guide, adam, loss=elbo) #svi = SVI(seq2seq.model, seq2seq.guide, adam, loss=elbo) #svi = SVI(rnnsearch.model, rnnsearch.guide, adam, loss=elbo) #Classical pytorch approach epochs = 2 print(len(dataloader)) print(len(dataset)) #seq2seq.train() for e in range(0, epochs): tot_loss = 0.0 run_epoch(dataloader, model, loss_compute) #for i, b in enumerate(dataloader): #print(b) #l = svi.step(b[0], b[1]) #optim.zero_grad() #loss = seq2seq( b[0], b[1]) #loss.backward() #optim.step() #tot_loss += loss.item() * len(b[0]) #print('batch {}, loss {}'.format(i, l)) #loss += l print(tot_loss / len(dataset)) to_translate = [dataset[i][0] for i in range(2) ] print('Original Sentences') for sent in to_translate: print(sent) print('Greedy Decoding Translation') greedy_trans = GreedyDecodingTranslation(model, to_translate) for sent in greedy_trans: print(' '.join([model.y_embed.getIndex2Word(s) for s in sent])) print('Simple Greedy Decoding Translation') greedy_trans = SimpleGreedyDecodingTranslation(model, to_translate) for sent in greedy_trans: print(' '.join([model.y_embed.getIndex2Word(s) for s in sent])) #print('Presumably Beam Search Decoding Translation') #beam_trans = BeamDecodingTranslation(seq2seq, to_translate, 3) #for sent in beam_trans: # print(' '.join([seq2seq.y_embed.getIndex2Word(s) for s in sent])) model = None optimizer = None loss = None
def main(args): # setup logging log = get_logger(args.log) log(args) root_dir = r'D:\projects\trading\mlbootcamp\tickers' series_length = 60 lookback = 50 #160 input_dim = 1 train_start_date = datetime.date(2010, 1, 1) train_end_date = datetime.date(2016, 1, 1) test_start_date = train_end_date test_end_date = datetime.date(2019, 1, 1) min_sequence_length_train = 2 * (series_length + lookback) min_sequence_length_test = 2 * (series_length + lookback) dataset_train = create_ticker_dataset(root_dir, series_length, lookback, min_sequence_length_train, start_date=train_start_date, end_date=train_end_date) dataset_test = create_ticker_dataset(root_dir, series_length, lookback, min_sequence_length_test, start_date=test_start_date, end_date=test_end_date) dataloader_train = DataLoader(dataset_train, batch_size=args.mini_batch_size, shuffle=True, num_workers=0, drop_last=True) dataloader_test = DataLoader(dataset_test, batch_size=args.mini_batch_size, shuffle=False, num_workers=0, drop_last=True) N_train_data = len(dataset_train) N_test_data = len(dataset_test) N_mini_batches = N_train_data // args.mini_batch_size N_train_time_slices = args.mini_batch_size * N_mini_batches print(f'N_train_data: {N_train_data}, N_test_data: {N_test_data}') # how often we do validation/test evaluation during training val_test_frequency = 50 # the number of samples we use to do the evaluation n_eval_samples = 1 # instantiate the dmm dmm = DMM(input_dim=input_dim, rnn_dropout_rate=args.rnn_dropout_rate, num_iafs=args.num_iafs, iaf_dim=args.iaf_dim, use_cuda=args.cuda) # setup optimizer adam_params = { "lr": args.learning_rate, "betas": (args.beta1, args.beta2), "clip_norm": args.clip_norm, "lrd": args.lr_decay, "weight_decay": args.weight_decay } adam = ClippedAdam(adam_params) # setup inference algorithm elbo = JitTrace_ELBO() if args.jit else Trace_ELBO() svi = SVI(dmm.model, dmm.guide, adam, loss=elbo) # if checkpoint files provided, load model and optimizer states from disk before we start training if args.load_opt != '' and args.load_model != '': load_checkpoint(dmm, adam, log) ################# # TRAINING LOOP # ################# times = [time.time()] for epoch in range(args.num_epochs): print(f'Starting epoch {epoch}.') # accumulator for our estimate of the negative log likelihood (or rather -elbo) for this epoch epoch_nll = 0.0 # prepare mini-batch subsampling indices for this epoch shuffled_indices = torch.randperm(N_train_data) # process each mini-batch; this is where we take gradient steps dmm.train() for which_mini_batch in range(N_mini_batches): print( f'Epoch {epoch} of {args.num_epochs}, Batch {which_mini_batch} of {N_mini_batches}.' ) mini_batch = next(iter(dataloader_train)) epoch_nll += process_minibatch(svi, epoch, mini_batch, N_mini_batches, which_mini_batch, shuffled_indices) # if specified, save model and optimizer states to disk every checkpoint_freq epochs if 1: #args.checkpoint_freq > 0 and epoch > 0 and epoch % args.checkpoint_freq == 0: save_checkpoint(dmm, adam, log) # report training diagnostics times.append(time.time()) epoch_time = times[-1] - times[-2] log("[training epoch %04d] %.4f \t\t\t\t(dt = %.3f sec)" % (epoch, epoch_nll / N_train_time_slices, epoch_time)) # do evaluation on test and validation data and report results if val_test_frequency > 0 and epoch > 0 and epoch % val_test_frequency == 0: val_nll, test_nll = do_evaluation() log("[val/test epoch %04d] %.4f %.4f" % (epoch, val_nll, test_nll)) # Testing. print(f"Testing epoch {epoch}.") dmm.eval() mini_batch = next(iter(dataloader_test)) fig = test_minibatch(dmm, mini_batch, args) fig.savefig(f'test_batch_{epoch}.png') plt.close(fig) # if 1: # fig, _, _ = run_tsne(dmm, dataloader_test) # fig.savefig(f'tsne_{epoch}.png') # plt.close(fig) print("Testing") if 1: dmm.eval() mini_batch = next(iter(dataloader_test)) x, z, x_reconst = test_minibatch(dmm, mini_batch, args) n_plots = 5 fig, axes = plt.subplots(nrows=n_plots, ncols=1) for i in range(n_plots): input = x[i, :].numpy().squeeze() output = x_reconst[i, :] axes[i].plot(range(input.shape[0]), input) axes[i].plot(range(len(output)), output) axes[i].grid() fig.savefig(f'test_batch.png') plt.close(fig) if 1: # t-SNE. all_z_latents = [] for test_batch in dataloader_test: # z_latents = minibatch_inference(dmm, test_batch) # z_latents = encode_x_to_z(dmm, test_batch, sample_z_t=False) x, z, x_reconst = test_minibatch(dmm, test_batch, args, sample_z=True) all_z_latents.append(z[:, x.shape[1] - 1, :]) # all_latents = torch.cat(all_z_latents, dim=0) all_latents = np.concatenate(all_z_latents, axis=0) # Run t-SNE with 2 output dimensions. from sklearn.manifold import TSNE model_tsne = TSNE(n_components=2, random_state=0) # z_states = all_latents.detach().cpu().numpy() z_states = all_latents z_embed = model_tsne.fit_transform(z_states) # Plot t-SNE embedding. fig = plt.figure() plt.scatter(z_embed[:, 0], z_embed[:, 1], s=10) fig.savefig(f'tsne_test.png') plt.close(fig) return # Show some samples surrounding a given point. mini_batch = next(iter(dataloader_test)) # Use the inference network to determine the parameters of the latents. z_loc, z_scale = minibatch_latent_parameters(dmm, mini_batch) z = sample_latent_sequence(dmm, z_loc, z_scale) most_recent_latents = latents[:, -1, :] # Take n_random_samples = 9 print('Finished')
def main(args): # setup logging log = get_logger(args.log) log(args) jsb_file_loc = "./data/jsb_processed.pkl" # ingest training/validation/test data from disk data = pickle.load(open(jsb_file_loc, "rb")) training_seq_lengths = data['train']['sequence_lengths'] training_data_sequences = data['train']['sequences'] test_seq_lengths = data['test']['sequence_lengths'] test_data_sequences = data['test']['sequences'] val_seq_lengths = data['valid']['sequence_lengths'] val_data_sequences = data['valid']['sequences'] N_train_data = len(training_seq_lengths) N_train_time_slices = float(np.sum(training_seq_lengths)) N_mini_batches = int(N_train_data / args.mini_batch_size + int(N_train_data % args.mini_batch_size > 0)) log("N_train_data: %d avg. training seq. length: %.2f N_mini_batches: %d" % (N_train_data, np.mean(training_seq_lengths), N_mini_batches)) # how often we do validation/test evaluation during training val_test_frequency = 50 # the number of samples we use to do the evaluation n_eval_samples = 1 # package repeated copies of val/test data for faster evaluation # (i.e. set us up for vectorization) def rep(x): y = np.repeat(x, n_eval_samples, axis=0) return y # get the validation/test data ready for the dmm: pack into sequences, etc. val_seq_lengths = rep(val_seq_lengths) test_seq_lengths = rep(test_seq_lengths) val_batch, val_batch_reversed, val_batch_mask, val_seq_lengths = poly.get_mini_batch( np.arange(n_eval_samples * val_data_sequences.shape[0]), rep(val_data_sequences), val_seq_lengths, cuda=args.cuda) test_batch, test_batch_reversed, test_batch_mask, test_seq_lengths = poly.get_mini_batch( np.arange(n_eval_samples * test_data_sequences.shape[0]), rep(test_data_sequences), test_seq_lengths, cuda=args.cuda) # instantiate the dmm dmm = DMM(rnn_dropout_rate=args.rnn_dropout_rate, num_iafs=args.num_iafs, iaf_dim=args.iaf_dim, use_cuda=args.cuda) # setup optimizer adam_params = { "lr": args.learning_rate, "betas": (args.beta1, args.beta2), "clip_norm": args.clip_norm, "lrd": args.lr_decay, "weight_decay": args.weight_decay } adam = ClippedAdam(adam_params) # setup inference algorithm elbo = SVI(dmm.model, dmm.guide, adam, Trace_ELBO()) # now we're going to define some functions we need to form the main training loop # saves the model and optimizer states to disk def save_checkpoint(): log("saving model to %s..." % args.save_model) torch.save(dmm.state_dict(), os.path.join('.', 'checkpoints', 'dmm_model.pth')) log("saving optimizer states to %s..." % args.save_opt) adam.save(os.path.join('.', 'checkpoints', 'dmm_opt.pth')) log("done saving model and optimizer checkpoints to disk.") # loads the model and optimizer states from disk def load_checkpoint(): assert exists(args.load_opt) and exists(args.load_model), \ "--load-model and/or --load-opt misspecified" log("loading model from %s..." % args.load_model) dmm.load_state_dict( torch.load(os.path.join('.', 'checkpoints', 'dmm_model.pth'))) log("loading optimizer states from %s..." % args.load_opt) adam.load(os.path.join('.', 'checkpoints', 'dmm_opt.pth')) log("done loading model and optimizer states.") # prepare a mini-batch and take a gradient step to minimize -elbo def process_minibatch(epoch, which_mini_batch, shuffled_indices): if args.annealing_epochs > 0 and epoch < args.annealing_epochs: # compute the KL annealing factor approriate for the current mini-batch in the current epoch min_af = args.minimum_annealing_factor annealing_factor = min_af + (1.0 - min_af) * \ (float(which_mini_batch + epoch * N_mini_batches + 1) / float(args.annealing_epochs * N_mini_batches)) else: # by default the KL annealing factor is unity annealing_factor = 1.0 # compute which sequences in the training set we should grab mini_batch_start = (which_mini_batch * args.mini_batch_size) mini_batch_end = np.min([(which_mini_batch + 1) * args.mini_batch_size, N_train_data]) mini_batch_indices = shuffled_indices[mini_batch_start:mini_batch_end] # grab a fully prepped mini-batch using the helper function in the data loader mini_batch, mini_batch_reversed, mini_batch_mask, mini_batch_seq_lengths \ = poly.get_mini_batch(mini_batch_indices, training_data_sequences, training_seq_lengths, cuda=args.cuda) # do an actual gradient step loss = elbo.step(mini_batch, mini_batch_reversed, mini_batch_mask, mini_batch_seq_lengths, annealing_factor) # keep track of the training loss return loss # helper function for doing evaluation def do_evaluation(): # put the RNN into evaluation mode (i.e. turn off drop-out if applicable) dmm.rnn.eval() # compute the validation and test loss n_samples many times val_nll = elbo.evaluate_loss(val_batch, val_batch_reversed, val_batch_mask, val_seq_lengths) / np.sum(val_seq_lengths) test_nll = elbo.evaluate_loss( test_batch, test_batch_reversed, test_batch_mask, test_seq_lengths) / np.sum(test_seq_lengths) # put the RNN back into training mode (i.e. turn on drop-out if applicable) dmm.rnn.train() return val_nll, test_nll # if checkpoint files provided, load model and optimizer states from disk before we start training if args.load_opt != '' and args.load_model != '': load_checkpoint() ################# # TRAINING LOOP # ################# desc = "Epoch %4d | Loss %.4f | CUDA enabled" if args.cuda else "Epoch %4d | Loss %.4f" pbar = trange(args.num_epochs, desc=desc, unit="epoch") # for epoch in range(args.num_epochs): for epoch in pbar: # if specified, save model and optimizer states to disk every checkpoint_freq epochs if args.checkpoint_freq > 0 and epoch > 0 and epoch % args.checkpoint_freq == 0: save_checkpoint() # accumulator for our estimate of the negative log likelihood (or rather -elbo) for this epoch epoch_nll = 0.0 # prepare mini-batch subsampling indices for this epoch shuffled_indices = np.arange(N_train_data) np.random.shuffle(shuffled_indices) # process each mini-batch; this is where we take gradient steps for which_mini_batch in range(N_mini_batches): epoch_nll += process_minibatch(epoch, which_mini_batch, shuffled_indices) # report training diagnostics pbar.set_description(desc % (epoch, epoch_nll / N_train_time_slices)) if np.isnan(epoch_nll): print("Gradient exploded. Exiting program.") quit() # do evaluation on test and validation data and report results if val_test_frequency > 0 and epoch > 0 and epoch % val_test_frequency == 0: val_nll, test_nll = do_evaluation() pbar.write("Epoch %04d | Validation Loss %.4f, Test Loss %.4f" % (epoch, val_nll, test_nll))
# load weights if available prev_epoch = 0 if args.weights is not None: try: print("Loading trained weights: {}".format(args.weights)) states = torch.load(args.weights) vae.load_state_dict(states['state_dict']) prev_epoch = int(states['epoch']) except IOError: print("File not found: {}".format(args.weights)) sys.exit(-1) # optimizer adam_params = {'lr': 1e-6, 'clip_norm': 10, 'weight_decay': opts['w_decay']} optimizer = ClippedAdam(adam_params) # inference svi = SVI(vae.model, vae.guide, optimizer, loss='ELBO') # loss train_elbo = [None] * args.epochs test_elbo = [None] * args.epochs # training info print("... Training VAE with {} days".format(args.dim)) with open('./models/vae_log.txt', 'a') as f: f.write('=== New run ===\n') # training loop best_lb = -np.inf