def build_made( batch_x: Tensor = None, batch_y: Tensor = None, z_score_x: bool = True, z_score_y: bool = True, hidden_features: int = 50, num_blocks: int = 5, num_mixture_components: int = 10, embedding_net: nn.Module = nn.Identity(), ) -> nn.Module: """Builds MADE p(x|y). Args: batch_x: Batch of xs, used to infer dimensionality and (optional) z-scoring. batch_y: Batch of ys, used to infer dimensionality and (optional) z-scoring. z_score_x: Whether to z-score xs passing into the network. z_score_y: Whether to z-score ys passing into the network. hidden_features: Number of hidden features. num_blocks: Number of MADE blocks. num_mixture_components: Number of mixture components. embedding_net: Optional embedding network for y. Returns: Neural network. """ x_numel = batch_x[0].numel() y_numel = batch_y[0].numel() if x_numel == 1: warn( f"In one-dimensional output space, this flow is limited to Gaussians" ) transform = transforms.IdentityTransform() if z_score_x: transform_zx = standardizing_transform(batch_x) transform = transforms.CompositeTransform([transform_zx, transform]) if z_score_y: embedding_net = nn.Sequential(standardizing_net(batch_y), embedding_net) distribution = distributions_.MADEMoG( features=x_numel, hidden_features=hidden_features, context_features=y_numel, num_blocks=num_blocks, num_mixture_components=num_mixture_components, use_residual_blocks=True, random_mask=False, activation=relu, dropout_probability=0.0, use_batch_norm=False, custom_initialization=True, ) neural_net = flows.Flow(transform, distribution, embedding_net) return neural_net
def posterior_nn( model: str, prior: torch.distributions.Distribution, context: torch.Tensor, embedding: Optional[torch.nn.Module] = None, hidden_features: int = 50, mdn_num_components: int = 20, made_num_mixture_components: int = 10, made_num_blocks: int = 4, flow_num_transforms: int = 5, ) -> torch.nn.Module: """Neural posterior density estimator Args: model: Model, one of maf / mdn / made / nsf prior: Prior distribution context: Observation embedding: Embedding network hidden_features: For all, number of hidden features mdn_num_components: For MDNs only, number of components made_num_mixture_components: For MADEs only, number of mixture components made_num_blocks: For MADEs only, number of blocks flow_num_transforms: For flows only, number of transforms Returns: Neural network """ mean, std = (prior.mean, prior.stddev) standardizing_transform = transforms.AffineTransform( shift=-mean / std, scale=1 / std ) parameter_dim = prior.sample([1]).shape[1] context = utils.torchutils.atleast_2d(context) observation_dim = torch.tensor([context.shape[1:]]) if model == "mdn": neural_net = MultivariateGaussianMDN( features=parameter_dim, context_features=observation_dim, hidden_features=hidden_features, hidden_net=nn.Sequential( nn.Linear(observation_dim, hidden_features), nn.ReLU(), nn.Dropout(p=0.0), nn.Linear(hidden_features, hidden_features), nn.ReLU(), nn.Linear(hidden_features, hidden_features), nn.ReLU(), ), num_components=mdn_num_components, custom_initialization=True, ) elif model == "made": transform = standardizing_transform distribution = distributions_.MADEMoG( features=parameter_dim, hidden_features=hidden_features, context_features=observation_dim, num_blocks=made_num_blocks, num_mixture_components=made_num_mixture_components, use_residual_blocks=True, random_mask=False, activation=torch.relu, dropout_probability=0.0, use_batch_norm=False, custom_initialization=True, ) neural_net = flows.Flow(transform, distribution, embedding) elif model == "maf": transform = transforms.CompositeTransform( [ transforms.CompositeTransform( [ transforms.MaskedAffineAutoregressiveTransform( features=parameter_dim, hidden_features=hidden_features, context_features=observation_dim, num_blocks=2, use_residual_blocks=False, random_mask=False, activation=torch.tanh, dropout_probability=0.0, use_batch_norm=True, ), transforms.RandomPermutation(features=parameter_dim), ] ) for _ in range(flow_num_transforms) ] ) transform = transforms.CompositeTransform([standardizing_transform, transform,]) distribution = distributions_.StandardNormal((parameter_dim,)) neural_net = flows.Flow(transform, distribution, embedding) elif model == "nsf": transform = transforms.CompositeTransform( [ transforms.CompositeTransform( [ transforms.PiecewiseRationalQuadraticCouplingTransform( mask=create_alternating_binary_mask( features=parameter_dim, even=(i % 2 == 0) ), transform_net_create_fn=lambda in_features, out_features: nets.ResidualNet( in_features=in_features, out_features=out_features, hidden_features=hidden_features, context_features=observation_dim, num_blocks=2, activation=torch.relu, dropout_probability=0.0, use_batch_norm=False, ), num_bins=10, tails="linear", tail_bound=3.0, apply_unconditional_transform=False, ), transforms.LULinear(parameter_dim, identity_init=True), ] ) for i in range(flow_num_transforms) ] ) transform = transforms.CompositeTransform([standardizing_transform, transform,]) distribution = distributions_.StandardNormal((parameter_dim,)) neural_net = flows.Flow(transform, distribution, embedding) else: raise ValueError return neural_net
def build_made( batch_x: Tensor, batch_y: Tensor, z_score_x: Optional[str] = "independent", z_score_y: Optional[str] = "independent", hidden_features: int = 50, num_mixture_components: int = 10, embedding_net: nn.Module = nn.Identity(), **kwargs, ) -> nn.Module: """Builds MADE p(x|y). Args: batch_x: Batch of xs, used to infer dimensionality and (optional) z-scoring. batch_y: Batch of ys, used to infer dimensionality and (optional) z-scoring. z_score_x: Whether to z-score xs passing into the network, can be one of: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_y: Whether to z-score ys passing into the network, same options as z_score_x. hidden_features: Number of hidden features. num_mixture_components: Number of mixture components. embedding_net: Optional embedding network for y. kwargs: Additional arguments that are passed by the build function but are not relevant for mades and are therefore ignored. Returns: Neural network. """ x_numel = batch_x[0].numel() # Infer the output dimensionality of the embedding_net by making a forward pass. check_data_device(batch_x, batch_y) check_embedding_net_device(embedding_net=embedding_net, datum=batch_y) y_numel = embedding_net(batch_y[:1]).numel() if x_numel == 1: warn( "In one-dimensional output space, this flow is limited to Gaussians" ) transform = transforms.IdentityTransform() z_score_x_bool, structured_x = z_score_parser(z_score_x) if z_score_x_bool: transform_zx = standardizing_transform(batch_x, structured_x) transform = transforms.CompositeTransform([transform_zx, transform]) z_score_y_bool, structured_y = z_score_parser(z_score_y) if z_score_y_bool: embedding_net = nn.Sequential(standardizing_net(batch_y, structured_y), embedding_net) distribution = distributions_.MADEMoG( features=x_numel, hidden_features=hidden_features, context_features=y_numel, num_blocks=5, num_mixture_components=num_mixture_components, use_residual_blocks=True, random_mask=False, activation=relu, dropout_probability=0.0, use_batch_norm=False, custom_initialization=True, ) neural_net = flows.Flow(transform, distribution, embedding_net) return neural_net