def main(): dataset = get_post_dataset() global_step = 0 m = nn.DataParallel(ModelPostNet().cuda()) m.train() optimizer = t.optim.Adam(m.parameters(), lr=hp.lr) writer = SummaryWriter() for epoch in range(hp.epochs): dataloader = DataLoader(dataset, batch_size=hp.batch_size, shuffle=True, collate_fn=collate_fn_postnet, drop_last=True, num_workers=0) pbar = tqdm(dataloader) for i, data in enumerate(pbar): pbar.set_description("Processing at epoch %d" % epoch) global_step += 1 if global_step < 400000: adjust_learning_rate(optimizer, global_step) mel, mag = data mel = mel.cuda() mag = mag.cuda() mag_pred = m.forward(mel) loss = nn.MSELoss()(mag_pred, mag) if global_step % 10 == 0: print('total_loss==', loss.item()) writer.add_scalars('training_loss', { 'loss': loss, }, global_step) optimizer.zero_grad() # Calculate gradients loss.backward() nn.utils.clip_grad_norm_(m.parameters(), 1.) # Update weights optimizer.step() if global_step % hp.save_step == 0: t.save( { 'model': m.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(hp.checkpoint_path, 'checkpoint_postnet_%d.pth.tar' % global_step))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--step', type=int, help='Global step to restore checkpoint', default=0) args = parser.parse_args() dataset = get_post_dataset() global_step = args.step m = nn.DataParallel(ModelPostNet().cuda(1), device_ids=[i+1 for i in range(7)]) if not os.path.exists(hp.checkpoint_path): os.makedirs(hp.checkpoint_path) if args.step > 0: ckpt_path = os.path.join(hp.checkpoint_path,'checkpoint_postnet_%d.pth.tar' % global_step) ckpt = torch.load(ckpt_path) m.load_state_dict(ckpt['model']) m.train() optimizer = torch.optim.Adam(m.parameters(), lr=hp.lr) if args.step > 0: optimizer.load_state_dict(ckpt['optimizer']) writer = SummaryWriter() for epoch in range(hp.epochs): dataloader = DataLoader(dataset, batch_size=hp.batch_size, shuffle=True, collate_fn=collate_fn_postnet, drop_last=True, num_workers=8) pbar = tqdm(dataloader) for i, data in enumerate(pbar): pbar.set_description("Processing at epoch %d"%epoch) global_step += 1 if global_step < 400000: adjust_learning_rate(optimizer, global_step) mel, mag = data mel = mel.cuda(1) mag = mag.cuda(1) mag_pred = m.forward(mel) loss = nn.L1Loss()(mag_pred, mag) writer.add_scalars('training_loss',{ 'loss':loss, }, global_step) optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(m.parameters(), 1.) optimizer.step() if global_step % hp.save_step_post == 0: torch.save({'model':m.state_dict(), 'optimizer':optimizer.state_dict()}, os.path.join(hp.checkpoint_path,'checkpoint_postnet_%d.pth.tar' % global_step))