def _test_model_impl(model, in_dim, out_dim): B = 4 T = 100 init_seed(B * T) x = torch.rand(B, T, in_dim) lengths = torch.Tensor([T] * B).long() # warmup forward pass with torch.no_grad(): y = model(x, lengths) y_inf = model.inference(x, lengths) # MDN case if model.prediction_type() == PredictionType.PROBABILISTIC: log_pi, log_sigma, mu = y num_gaussian = log_pi.shape[2] assert mu.shape == (B, T, num_gaussian, out_dim) assert log_sigma.shape == (B, T, num_gaussian, out_dim) # NOTE: infernece output shouldn't have num_gaussian axis mu_inf, sigma_inf = y_inf assert mu_inf.shape == (B, T, out_dim) assert sigma_inf.shape == (B, T, out_dim) else: assert y.shape == (B, T, out_dim) assert y.shape == y_inf.shape
def my_app(config : DictConfig) -> None: global logger logger = getLogger(config.verbose) logger.info(config.pretty()) if use_cuda: from torch.backends import cudnn cudnn.benchmark = config.train.cudnn.benchmark cudnn.deterministic = config.train.cudnn.deterministic logger.info(f"cudnn.deterministic: {cudnn.deterministic}") logger.info(f"cudnn.benchmark: {cudnn.benchmark}") logger.info(f"Random seed: {config.seed}") init_seed(config.seed) device = torch.device("cuda" if use_cuda else "cpu") if config.train.use_detect_anomaly: torch.autograd.set_detect_anomaly(True) logger.info("Set to use torch.autograd.detect_anomaly") # Model model = hydra.utils.instantiate(config.model.netG).to(device) # Optimizer optimizer_class = getattr(optim, config.train.optim.optimizer.name) optimizer = optimizer_class(model.parameters(), **config.train.optim.optimizer.params) # Scheduler lr_scheduler_class = getattr(optim.lr_scheduler, config.train.optim.lr_scheduler.name) lr_scheduler = lr_scheduler_class(optimizer, **config.train.optim.lr_scheduler.params) data_loaders = get_data_loaders(config) # Resume if config.train.resume.checkpoint is not None and len(config.train.resume.checkpoint) > 0: logger.info("Load weights from {}".format(config.train.resume.checkpoint)) checkpoint = torch.load(to_absolute_path(config.train.resume.checkpoint)) model.load_state_dict(checkpoint["state_dict"]) if config.train.resume.load_optimizer: logger.info("Load optimizer state") optimizer.load_state_dict(checkpoint["optimizer_state"]) lr_scheduler.load_state_dict(checkpoint["lr_scheduler_state"]) # Save model definition out_dir = to_absolute_path(config.train.out_dir) os.makedirs(out_dir, exist_ok=True) with open(join(out_dir, "model.yaml"), "w") as f: OmegaConf.save(config.model, f) # Run training loop train_loop(config, device, model, optimizer, lr_scheduler, data_loaders)
def _test_model_impl(model, in_dim): B = 4 T = 100 init_seed(B * T) x = torch.rand(B, T, in_dim) lengths = torch.Tensor([T] * B).long() # warmup forward pass with torch.no_grad(): y = model(x, lengths) y_inf = model.inference(x, lengths) assert y.shape == (B, T, in_dim) assert y.shape == y_inf.shape
def _test_postfilter_impl(model, model_config): B = 4 T = 100 init_seed(B * T) in_dim = sum(model_config.stream_sizes) x = torch.rand(B, T, in_dim) lengths = torch.Tensor([T] * B).long() # warmup forward pass with torch.no_grad(): y = model(x, lengths) y_inf = model.inference(x, lengths) assert x.shape == y.shape assert y_inf.shape == y.shape
def setUpClass(self): init_seed(42) # generate data # Inverse model written in PRML Book p. 273 # https://www.microsoft.com/en-us/research/people/cmbishop/prml-book/ n = 2500 self.d_in = 1 self.d_out = 1 x_train = np.random.uniform(0, 1, (n, self.d_in)).astype(np.float32) noise = np.random.uniform(-0.1, 0.1, (n, self.d_in)).astype(np.float32) y_train = x_train + 0.3 * np.sin(2 * np.pi * x_train) + noise self.x_train_inv = y_train self.y_train_inv = x_train self.x_test = np.array([0.0, 0.2, 0.5, 0.8, 1.0]).astype(np.float32) # [lower_limit, upper_limit] corresponding to x_test self.y_test_range = np.array([[-0.5, 1], [-0.5, 2.0], [0.2, 0.9], [0.8, 1.0], [0.85, 1.05]]).astype(np.float32) hidden_dim = 50 num_gaussians = 30 num_layers = 0 self.batch_size = n use_cuda = torch.cuda.is_available() self.device = torch.device("cuda" if use_cuda else "cpu") self.model = MDN( self.d_in, hidden_dim, self.d_out, num_layers=num_layers, num_gaussians=num_gaussians, ).to(self.device) learning_rate = 0.008 self.opt = optim.Adam(self.model.parameters(), lr=learning_rate)
def _test_model_impl(model, model_config): B = 4 T = 100 init_seed(B * T) x = torch.rand(B, T, model_config.netG.in_dim) y = torch.rand(B, T, model_config.netG.out_dim) lengths = torch.Tensor([T] * B).long() # warmup forward pass with torch.no_grad(): if model.is_autoregressive(): outs = model(x, lengths, y) else: outs = model(x, lengths) if isinstance(outs, tuple) and len(outs) == 2: y, lf0_residual = outs else: y, lf0_residual = outs, None y_inf = model.inference(x, lengths) # MDN case if model.prediction_type() == PredictionType.PROBABILISTIC: log_pi, log_sigma, mu = y num_gaussian = log_pi.shape[2] assert mu.shape == (B, T, num_gaussian, model_config.netG.out_dim) assert log_sigma.shape == (B, T, num_gaussian, model_config.netG.out_dim) if lf0_residual is not None: assert lf0_residual.shape == (B, T, num_gaussian) # NOTE: infernece output shouldn't have num_gaussian axis mu_inf, sigma_inf = y_inf assert mu_inf.shape == (B, T, model_config.netG.out_dim) assert sigma_inf.shape == (B, T, model_config.netG.out_dim) else: if lf0_residual is not None: if isinstance(lf0_residual, list): lf0_residual = lf0_residual[-1] assert lf0_residual.shape == (B, T, 1) # NOTE: some models have multiple outputs (e.g. Tacotron) if isinstance(y, list): y = y[-1] assert y.shape == (B, T, model_config.netG.out_dim) assert y.shape == y_inf.shape
def setup_cyclegan(config, device, collate_fn=collate_fn_default): """Setup for training CycleGAN Args: config (dict): configuration for training device (torch.device): device to use for training collate_fn (callable, optional): collate function. Defaults to collate_fn_default. Returns: (tuple): tuple containing model, optimizer, learning rate scheduler, data loaders, tensorboard writer, logger, and scalers. """ logger = getLogger(config.verbose) logger.info(OmegaConf.to_yaml(config)) logger.info(f"PyTorch version: {torch.__version__}") if torch.cuda.is_available(): from torch.backends import cudnn cudnn.benchmark = config.train.cudnn.benchmark cudnn.deterministic = config.train.cudnn.deterministic logger.info(f"cudnn.deterministic: {cudnn.deterministic}") logger.info(f"cudnn.benchmark: {cudnn.benchmark}") if torch.backends.cudnn.version() is not None: logger.info(f"cuDNN version: {torch.backends.cudnn.version()}") logger.info(f"Random seed: {config.seed}") init_seed(config.seed) if config.train.use_detect_anomaly: torch.autograd.set_detect_anomaly(True) logger.info("Set to use torch.autograd.detect_anomaly") if "use_amp" in config.train and config.train.use_amp: logger.info("Use mixed precision training") grad_scaler = GradScaler() else: grad_scaler = None # Model G netG_A2B = hydra.utils.instantiate(config.model.netG).to(device) netG_B2A = hydra.utils.instantiate(config.model.netG).to(device) logger.info( "[Generator] Number of trainable params: {:.3f} million".format( num_trainable_params(netG_A2B) / 1000000.0)) logger.info(netG_A2B) # Optimizer and LR scheduler for G optG, schedulerG = _instantiate_optim_cyclegan(config.train.optim.netG, netG_A2B, netG_B2A) # Model D netD_A = hydra.utils.instantiate(config.model.netD).to(device) netD_B = hydra.utils.instantiate(config.model.netD).to(device) logger.info( "[Discriminator] Number of trainable params: {:.3f} million".format( num_trainable_params(netD_A) / 1000000.0)) logger.info(netD_A) # Optimizer and LR scheduler for D optD, schedulerD = _instantiate_optim_cyclegan(config.train.optim.netD, netD_A, netD_B) # DataLoader data_loaders = get_data_loaders(config.data, collate_fn, logger) set_epochs_based_on_max_steps_(config.train, len(data_loaders["train_no_dev"]), logger) # Resume # TODO # _resume(logger, config.train.resume.netG, netG, optG, schedulerG) # _resume(logger, config.train.resume.netD, netD, optD, schedulerD) if config.data_parallel: netG_A2B = nn.DataParallel(netG_A2B) netG_B2A = nn.DataParallel(netG_B2A) netD_A = nn.DataParallel(netD_A) netD_B = nn.DataParallel(netD_B) # Mlflow if config.mlflow.enabled: mlflow.set_tracking_uri("file://" + get_original_cwd() + "/mlruns") mlflow.set_experiment(config.mlflow.experiment) # NOTE: disable tensorboard if mlflow is enabled writer = None logger.info("Using mlflow instead of tensorboard") else: # Tensorboard writer = SummaryWriter(to_absolute_path(config.train.log_dir)) # Scalers if "in_scaler_path" in config.data and config.data.in_scaler_path is not None: in_scaler = joblib.load(to_absolute_path(config.data.in_scaler_path)) if isinstance(in_scaler, SKMinMaxScaler): in_scaler = MinMaxScaler( in_scaler.min_, in_scaler.scale_, in_scaler.data_min_, in_scaler.data_max_, ) else: in_scaler = None if "out_scaler_path" in config.data and config.data.out_scaler_path is not None: out_scaler = joblib.load(to_absolute_path(config.data.out_scaler_path)) out_scaler = StandardScaler(out_scaler.mean_, out_scaler.var_, out_scaler.scale_) else: out_scaler = None return ( (netG_A2B, netG_B2A, optG, schedulerG), (netD_A, netD_B, optD, schedulerD), grad_scaler, data_loaders, writer, logger, in_scaler, out_scaler, )
def setup(config, device, collate_fn=collate_fn_default): """Setup for training Args: config (dict): configuration for training device (torch.device): device to use for training collate_fn (callable, optional): collate function. Defaults to collate_fn_default. Returns: (tuple): tuple containing model, optimizer, learning rate scheduler, data loaders, tensorboard writer, logger, and scalers. """ logger = getLogger(config.verbose) logger.info(OmegaConf.to_yaml(config)) logger.info(f"PyTorch version: {torch.__version__}") if torch.cuda.is_available(): from torch.backends import cudnn cudnn.benchmark = config.train.cudnn.benchmark cudnn.deterministic = config.train.cudnn.deterministic logger.info(f"cudnn.deterministic: {cudnn.deterministic}") logger.info(f"cudnn.benchmark: {cudnn.benchmark}") if torch.backends.cudnn.version() is not None: logger.info(f"cuDNN version: {torch.backends.cudnn.version()}") logger.info(f"Random seed: {config.seed}") init_seed(config.seed) if config.train.use_detect_anomaly: torch.autograd.set_detect_anomaly(True) logger.info("Set to use torch.autograd.detect_anomaly") if "use_amp" in config.train and config.train.use_amp: logger.info("Use mixed precision training") grad_scaler = GradScaler() else: grad_scaler = None # Model model = hydra.utils.instantiate(config.model.netG).to(device) logger.info("Number of trainable params: {:.3f} million".format( num_trainable_params(model) / 1000000.0)) logger.info(model) # Optimizer optimizer_class = getattr(optim, config.train.optim.optimizer.name) optimizer = optimizer_class(model.parameters(), **config.train.optim.optimizer.params) # Scheduler lr_scheduler_class = getattr(optim.lr_scheduler, config.train.optim.lr_scheduler.name) lr_scheduler = lr_scheduler_class(optimizer, **config.train.optim.lr_scheduler.params) # DataLoader data_loaders = get_data_loaders(config.data, collate_fn, logger) set_epochs_based_on_max_steps_(config.train, len(data_loaders["train_no_dev"]), logger) # Resume if (config.train.resume.checkpoint is not None and len(config.train.resume.checkpoint) > 0): logger.info("Load weights from %s", config.train.resume.checkpoint) checkpoint = torch.load( to_absolute_path(config.train.resume.checkpoint)) model.load_state_dict(checkpoint["state_dict"]) if config.train.resume.load_optimizer: logger.info("Load optimizer state") optimizer.load_state_dict(checkpoint["optimizer_state"]) lr_scheduler.load_state_dict(checkpoint["lr_scheduler_state"]) if config.data_parallel: model = nn.DataParallel(model) # Mlflow if config.mlflow.enabled: mlflow.set_tracking_uri("file://" + get_original_cwd() + "/mlruns") mlflow.set_experiment(config.mlflow.experiment) # NOTE: disable tensorboard if mlflow is enabled writer = None logger.info("Using mlflow instead of tensorboard") else: # Tensorboard writer = SummaryWriter(to_absolute_path(config.train.log_dir)) # Scalers if "in_scaler_path" in config.data and config.data.in_scaler_path is not None: in_scaler = joblib.load(to_absolute_path(config.data.in_scaler_path)) in_scaler = MinMaxScaler(in_scaler.min_, in_scaler.scale_, in_scaler.data_min_, in_scaler.data_max_) else: in_scaler = None if "out_scaler_path" in config.data and config.data.out_scaler_path is not None: out_scaler = joblib.load(to_absolute_path(config.data.out_scaler_path)) out_scaler = StandardScaler(out_scaler.mean_, out_scaler.var_, out_scaler.scale_) else: out_scaler = None return ( model, optimizer, lr_scheduler, grad_scaler, data_loaders, writer, logger, in_scaler, out_scaler, )