def anneal_ringer(code): f = lambda (x): -log(fitness(code, x)) mu = 0.1 prop = lambda (bd, sites): (mutate_bd(bd, mu), [mutate_site(sites[0], mu)] * n) x0 = sample_species() ring = anneal(f, prop, x0, return_trajectory=False, k=0.001) return ring
def train(self, loader, epoch, args): """Train for a single epoch using batched gradient descent.""" model, optimizer = self.model, self.optimizer model.train() data_num, loss = 0, 0.0 rec_mults = dict(args.rec_mults) # Iterate over batches for b_num, (targets, mask, lengths, _) in enumerate(loader): # Anneal KLD loss multipliers b_tot = b_num + epoch * len(loader) kld_mult =\ anneal(0.0, args.kld_mult, b_tot, args.kld_anneal*len(loader)) # Send to device mask = mask.to(args.device) for m in targets.keys(): targets[m] = targets[m].to(args.device) # Introduce burst deletions to improve interpolation inputs = mseq.burst_delete(targets, args.burst_frac, lengths) # Compute batch loss b_loss = model.step(inputs, mask, kld_mult, rec_mults, targets=targets, lengths=lengths, **args.train_args) loss += b_loss # Average over number of datapoints before stepping b_loss /= sum(lengths) b_loss.backward() # Plot gradients if args.gradients: plot_grad_flow(model.named_parameters()) # Gradient clipping if args.clip_grad is not None and args.clip_grad > 0: clip_grad_norm_(model.parameters(), args.clip_grad) # Step, then zero gradients optimizer.step() optimizer.zero_grad() # Keep track of total number of time-points data_num += sum(lengths) print('Batch: {:5d}\tLoss: {:10.1f}'.\ format(b_num, loss/data_num)) # Average losses and print loss /= data_num print('---') print('Epoch: {}\tLoss: {:10.1f}\tKLD-Mult: {:0.3f}'.\ format(epoch, loss, kld_mult)) return loss
def mr_system_sa(alphas,init_system=None,G=100000.0,n=16,L=10, sse_epsilon=0.0001,proposal=propose,scale=1000, iterations=10000,return_trajectory=False): if init_system is None: matrix = [[0,0,0,0] for i in range(L)] motif = [random_site(L) for i in range(n)] else: matrix,motif = init_system scaled_sse = lambda(matrix,motif):sse(matrix,motif,alphas,G,n)*scale return anneal(scaled_sse, lambda(matrix,motif):proposal(matrix,motif), (matrix,motif), iterations=iterations, stopping_crit = sse_epsilon*scale, return_trajectory=return_trajectory)
def train_(train_dataloader, log_train, steps, model, optimizer, ix2w, epoch, args): lossD = get_loss_D() # for each batch for i, (xx, x_lens, ey, ye, y_lens) in enumerate(train_dataloader): steps += 1 # i only drop words on train. ey = utils.drop_words(ey, y_lens, args.word_dropout) x_recon, z, q_mu, q_logvar = model(xx, x_lens, ey, y_lens) bce, kld = model.loss_fn(ye, y_lens, x_recon, q_mu, q_logvar) loss = bce if args.framework == "vae": lossD['kld'].append(kld.item()) kl_weight = utils.anneal(steps, args.kl_anneal_steps) loss += (kld * kl_weight) if args.bow: bow_loss = model.loss_bow(ye, z) loss += bow_loss lossD['bow'].append(bow_loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() lossD['running'].append(loss.item()) lossD['bce'].append(bce.item()) #lossD['ppl'].append(np.exp(bce.item())) out = print_loss(log_train, lossD) print("--TRAIN--" + out) #### Reconstructions model.eval() input0 = model.embedding(torch.LongTensor([0]).to(device)) z = torch.randn(5, args.z_dim) vis.sample_reconstruct(log_sample, epoch, model, input0, z, ix2w) vis.input_reconstruct(log_sample, model, x_lens[0:5], xx[0:5], input0, ix2w, device) vis.train_reconstruct(log_sample, x_lens[0:5], xx[0:5], x_recon[0:5], ix2w) #### return steps
def mr_system(alphas,init_system=None,G=100000.0,n=16,L=10, sse_epsilon=0.00000001,use_annealing=True,scale=1000, iterations=10000,motif_prob=0.5,verbose=False): proposal = lambda matrix,motif:propose(matrix,motif,motif_prob=motif_prob) if init_system is None: matrix = [[0,0,0,0] for i in range(L)] motif = [random_site(L) for i in range(n)] else: matrix,motif = init_system if use_annealing: scaled_sse = lambda(matrix,motif):((sse(matrix,motif,alphas,G,n))*scale) return anneal(scaled_sse, lambda(matrix,motif):proposal(matrix,motif), (matrix,motif), iterations=iterations, stopping_crit = sse_epsilon*scale,verbose=verbose) else: scaled_sse = lambda(matrix,motif):exp((sse(matrix,motif,alphas,G,n))*-scale) return mh(scaled_sse, lambda(matrix,motif):proposal(matrix,motif), (matrix,motif), iterations=iterations, every=100,verbose=True)
def train_vae(cfgv, model, dataset): print('Training base vae ...') trainer = optim.Adam(model.vae_params(), lr=cfgv.lr) for it in tqdm(range(cfgv.s_iter, cfgv.s_iter + cfgv.n_iter + 1), disable=None): if it % cfgv.cheaplog_every == 0 or it % cfgv.expsvlog_every == 0: def tblog(k, v): log_value('train_' + k, v, it) else: tblog = lambda k, v: None inputs = dataset.next_batch('train_vae') beta = utils.anneal(cfgv.beta, it) (z_mu, z_logvar), (z, c), dec_logits = model(inputs.text, q_c='prior', sample_z=1) recon_loss = losses.recon_dec(inputs.text, dec_logits) kl_loss = losses.kl_gaussianprior(z_mu, z_logvar) wae_mmd_loss = losses.wae_mmd_gaussianprior(z, method='full_kernel') wae_mmdrf_loss = losses.wae_mmd_gaussianprior(z, method='rf') z_regu_losses = { 'kl': kl_loss, 'mmd': wae_mmd_loss, 'mmdrf': wae_mmdrf_loss } z_regu_loss = z_regu_losses[cfgv.z_regu_loss] z_logvar_L1 = z_logvar.abs().sum(1).mean( 0) # L1 in z-dim, mean over mb. z_logvar_KL_penalty = losses.kl_gaussian_sharedmu(z_mu, z_logvar) loss = recon_loss + beta * z_regu_loss \ + cfgv.lambda_logvar_L1 * z_logvar_L1 \ + cfgv.lambda_logvar_KL * z_logvar_KL_penalty trainer.zero_grad() loss.backward() grad_norm = torch.nn.utils.clip_grad_norm_(model.vae_params(), cfgv.clip_grad) trainer.step() tblog('z_mu_L1', z_mu.data.abs().mean().item()) tblog('z_logvar', z_logvar.data.mean().item()) tblog('z_logvar_L1', z_logvar_L1.item()) tblog('z_logvar_KL_penalty', z_logvar_KL_penalty.item()) tblog('L_vae', loss.item()) tblog('L_vae_recon', recon_loss.item()) tblog('L_vae_kl', kl_loss.item()) tblog('L_wae_mmd', wae_mmd_loss.item()) tblog('L_wae_mmdrf', wae_mmdrf_loss.item()) tblog('beta', beta) if it % cfgv.cheaplog_every == 0 or it % cfgv.expsvlog_every == 0: tqdm.write( 'ITER {} TRAINING (phase 1). loss_vae: {:.4f}; loss_recon: {:.4f}; loss_kl: {:.4f}; loss_mmd: {:.4f}; ' 'Grad_norm: {:.4e} '.format(it, loss.item(), recon_loss.item(), kl_loss.item(), wae_mmd_loss.item(), grad_norm)) log_sent, _, _ = model.generate_sentences( 1, sample_mode='categorical') tqdm.write('Sample (cat T=1.0): "{}"'.format( dataset.idx2sentence(log_sent.squeeze()))) sys.stdout.flush() if it % cfgv.expsvlog_every == 0 and it > 0: save_model(model, cfgv.chkpt_path.format(it)) # Sample 5k sentences from prior/heldout recon/.. to compute external metrics. sample_kwargs from config # start and end of training: do expensive evals too. tier = 3 if it == cfgv.s_iter or it == cfgv.s_iter + cfgv.n_iter else 2