def build_resnet_classifier( batch_x: Tensor, batch_y: Tensor, z_score_x: Optional[str] = "independent", z_score_y: Optional[str] = "independent", hidden_features: int = 50, embedding_net_x: nn.Module = nn.Identity(), embedding_net_y: nn.Module = nn.Identity(), num_blocks: int = 2, dropout_probability: float = 0.0, use_batch_norm: bool = False, ) -> nn.Module: """Builds ResNet classifier. In SNRE, the classifier will receive batches of thetas and xs. Args: batch_x: Batch of xs, used to infer dimensionality and (optional) z-scoring. batch_y: Batch of ys, used to infer dimensionality and (optional) z-scoring. z_score_x: Whether to z-score xs passing into the network, can be one of: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_y: Whether to z-score ys passing into the network, same options as z_score_x. hidden_features: Number of hidden features. embedding_net_x: Optional embedding network for x. embedding_net_y: Optional embedding network for y. Returns: Neural network. """ check_data_device(batch_x, batch_y) check_embedding_net_device(embedding_net=embedding_net_x, datum=batch_y) check_embedding_net_device(embedding_net=embedding_net_y, datum=batch_y) # Infer the output dimensionalities of the embedding_net by making a forward pass. x_numel = embedding_net_x(batch_x[:1]).numel() y_numel = embedding_net_y(batch_y[:1]).numel() neural_net = nets.ResidualNet( in_features=x_numel + y_numel, out_features=1, hidden_features=hidden_features, context_features=None, num_blocks=num_blocks, activation=relu, dropout_probability=dropout_probability, use_batch_norm=use_batch_norm, ) input_layer = build_input_layer( batch_x, batch_y, z_score_x, z_score_y, embedding_net_x, embedding_net_y ) neural_net = nn.Sequential(input_layer, neural_net) return neural_net
def build_mlp_classifier( batch_x: Tensor, batch_y: Tensor, z_score_x: Optional[str] = "independent", z_score_y: Optional[str] = "independent", hidden_features: int = 50, embedding_net_x: nn.Module = nn.Identity(), embedding_net_y: nn.Module = nn.Identity(), ) -> nn.Module: """Builds MLP classifier. In SNRE, the classifier will receive batches of thetas and xs. Args: batch_x: Batch of xs, used to infer dimensionality and (optional) z-scoring. batch_y: Batch of ys, used to infer dimensionality and (optional) z-scoring. z_score_x: Whether to z-score xs passing into the network, can be one of: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_y: Whether to z-score ys passing into the network, same options as z_score_x. hidden_features: Number of hidden features. embedding_net_x: Optional embedding network for x. embedding_net_y: Optional embedding network for y. Returns: Neural network. """ check_data_device(batch_x, batch_y) check_embedding_net_device(embedding_net=embedding_net_x, datum=batch_y) check_embedding_net_device(embedding_net=embedding_net_y, datum=batch_y) # Infer the output dimensionalities of the embedding_net by making a forward pass. x_numel = embedding_net_x(batch_x[:1]).numel() y_numel = embedding_net_y(batch_y[:1]).numel() neural_net = nn.Sequential( nn.Linear(x_numel + y_numel, hidden_features), nn.BatchNorm1d(hidden_features), nn.ReLU(), nn.Linear(hidden_features, hidden_features), nn.BatchNorm1d(hidden_features), nn.ReLU(), nn.Linear(hidden_features, 1), ) input_layer = build_input_layer( batch_x, batch_y, z_score_x, z_score_y, embedding_net_x, embedding_net_y ) neural_net = nn.Sequential(input_layer, neural_net) return neural_net
def build_mnle( batch_x: Tensor, batch_y: Tensor, z_score_x: Optional[str] = "independent", z_score_y: Optional[str] = "independent", num_transforms: int = 2, num_bins: int = 5, hidden_features: int = 50, hidden_layers: int = 2, tail_bound: float = 10.0, log_transform_x: bool = True, **kwargs, ): """Returns a density estimator for mixed data types. Uses a categorical net to model the discrete part and a neural spline flow (NSF) to model the continuous part of the data. Args: batch_x: batch of data batch_y: batch of parameters z_score_x: whether to z-score x. z_score_y: whether to z-score y. num_transforms: number of transforms in the NSF num_bins: bins per spline for NSF. hidden_features: number of hidden features used in both nets. hidden_layers: number of hidden layers in the categorical net. tail_bound: spline tail bound for NSF. log_transform_x: whether to apply a log-transform to x to move it to unbounded space, e.g., in case x consists of reaction time data (bounded by zero). Returns: MixedDensityEstimator: nn.Module for performing MNLE. """ check_data_device(batch_x, batch_y) if z_score_y == "independent": embedding = standardizing_net(batch_y) else: embedding = None warnings.warn( """The mixed neural likelihood estimator assumes that x contains continuous data in the first n-1 columns (e.g., reaction times) and categorical data in the last column (e.g., corresponding choices). If this is not the case for the passed `x` do not use this function.""") # Separate continuous and discrete data. cont_x, disc_x = _separate_x(batch_x) # Infer input and output dims. dim_parameters = batch_y[0].numel() num_categories = unique(disc_x).numel() # Set up a categorical RV neural net for modelling the discrete data. disc_nle = CategoricalNet( num_input=dim_parameters, num_categories=num_categories, num_hidden=hidden_features, num_layers=hidden_layers, embedding=embedding, ) # Set up a NSF for modelling the continuous data, conditioned on the discrete data. cont_nle = build_nsf( batch_x=torch.log(cont_x) if log_transform_x else cont_x, # log transform manually. batch_y=torch.cat((batch_y, disc_x), dim=1), # condition on discrete data too. z_score_y=z_score_y, z_score_x=z_score_x, num_bins=num_bins, num_transforms=num_transforms, tail_bound=tail_bound, hidden_features=hidden_features, ) return MixedDensityEstimator( discrete_net=disc_nle, continuous_net=cont_nle, log_transform_x=log_transform_x, )
def build_mdn( batch_x: Tensor, batch_y: Tensor, z_score_x: Optional[str] = "independent", z_score_y: Optional[str] = "independent", hidden_features: int = 50, num_components: int = 10, embedding_net: nn.Module = nn.Identity(), **kwargs, ) -> nn.Module: """Builds MDN p(x|y). Args: batch_x: Batch of xs, used to infer dimensionality and (optional) z-scoring. batch_y: Batch of ys, used to infer dimensionality and (optional) z-scoring. z_score_x: Whether to z-score xs passing into the network, can be one of: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_y: Whether to z-score ys passing into the network, same options as z_score_x. hidden_features: Number of hidden features. num_components: Number of components. embedding_net: Optional embedding network for y. kwargs: Additional arguments that are passed by the build function but are not relevant for MDNs and are therefore ignored. Returns: Neural network. """ x_numel = batch_x[0].numel() # Infer the output dimensionality of the embedding_net by making a forward pass. check_data_device(batch_x, batch_y) check_embedding_net_device(embedding_net=embedding_net, datum=batch_y) y_numel = embedding_net(batch_y[:1]).numel() transform = transforms.IdentityTransform() z_score_x_bool, structured_x = utils.z_score_parser(z_score_x) if z_score_x_bool: transform_zx = utils.standardizing_transform(batch_x, structured_x) transform = transforms.CompositeTransform([transform_zx, transform]) z_score_y_bool, structured_y = utils.z_score_parser(z_score_y) if z_score_y_bool: embedding_net = nn.Sequential( utils.standardizing_net(batch_y, structured_y), embedding_net) distribution = MultivariateGaussianMDN( features=x_numel, context_features=y_numel, hidden_features=hidden_features, hidden_net=nn.Sequential( nn.Linear(y_numel, hidden_features), nn.ReLU(), nn.Dropout(p=0.0), nn.Linear(hidden_features, hidden_features), nn.ReLU(), nn.Linear(hidden_features, hidden_features), nn.ReLU(), ), num_components=num_components, custom_initialization=True, ) neural_net = flows.Flow(transform, distribution, embedding_net) return neural_net
def build_maf( batch_x: Tensor, batch_y: Tensor, z_score_x: Optional[str] = "independent", z_score_y: Optional[str] = "independent", hidden_features: int = 50, num_transforms: int = 5, embedding_net: nn.Module = nn.Identity(), num_blocks: int = 2, dropout_probability: float = 0.0, use_batch_norm: bool = False, **kwargs, ) -> nn.Module: """Builds MAF p(x|y). Args: batch_x: Batch of xs, used to infer dimensionality and (optional) z-scoring. batch_y: Batch of ys, used to infer dimensionality and (optional) z-scoring. z_score_x: Whether to z-score xs passing into the network, can be one of: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_y: Whether to z-score ys passing into the network, same options as z_score_x. hidden_features: Number of hidden features. num_transforms: Number of transforms. embedding_net: Optional embedding network for y. num_blocks: number of blocks used for residual net for context embedding. dropout_probability: dropout probability for regularization in residual net. use_batch_norm: whether to use batch norm in residual net. kwargs: Additional arguments that are passed by the build function but are not relevant for maf and are therefore ignored. Returns: Neural network. """ x_numel = batch_x[0].numel() # Infer the output dimensionality of the embedding_net by making a forward pass. check_data_device(batch_x, batch_y) check_embedding_net_device(embedding_net=embedding_net, datum=batch_y) y_numel = embedding_net(batch_y[:1]).numel() if x_numel == 1: warn( "In one-dimensional output space, this flow is limited to Gaussians" ) transform_list = [] for _ in range(num_transforms): block = [ transforms.MaskedAffineAutoregressiveTransform( features=x_numel, hidden_features=hidden_features, context_features=y_numel, num_blocks=num_blocks, use_residual_blocks=False, random_mask=False, activation=tanh, dropout_probability=dropout_probability, use_batch_norm=use_batch_norm, ), transforms.RandomPermutation(features=x_numel), ] transform_list += block z_score_x_bool, structured_x = z_score_parser(z_score_x) if z_score_x_bool: transform_list = [standardizing_transform(batch_x, structured_x) ] + transform_list z_score_y_bool, structured_y = z_score_parser(z_score_y) if z_score_y_bool: embedding_net = nn.Sequential(standardizing_net(batch_y, structured_y), embedding_net) # Combine transforms. transform = transforms.CompositeTransform(transform_list) distribution = distributions_.StandardNormal((x_numel, )) neural_net = flows.Flow(transform, distribution, embedding_net) return neural_net
def build_made( batch_x: Tensor, batch_y: Tensor, z_score_x: Optional[str] = "independent", z_score_y: Optional[str] = "independent", hidden_features: int = 50, num_mixture_components: int = 10, embedding_net: nn.Module = nn.Identity(), **kwargs, ) -> nn.Module: """Builds MADE p(x|y). Args: batch_x: Batch of xs, used to infer dimensionality and (optional) z-scoring. batch_y: Batch of ys, used to infer dimensionality and (optional) z-scoring. z_score_x: Whether to z-score xs passing into the network, can be one of: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_y: Whether to z-score ys passing into the network, same options as z_score_x. hidden_features: Number of hidden features. num_mixture_components: Number of mixture components. embedding_net: Optional embedding network for y. kwargs: Additional arguments that are passed by the build function but are not relevant for mades and are therefore ignored. Returns: Neural network. """ x_numel = batch_x[0].numel() # Infer the output dimensionality of the embedding_net by making a forward pass. check_data_device(batch_x, batch_y) check_embedding_net_device(embedding_net=embedding_net, datum=batch_y) y_numel = embedding_net(batch_y[:1]).numel() if x_numel == 1: warn( "In one-dimensional output space, this flow is limited to Gaussians" ) transform = transforms.IdentityTransform() z_score_x_bool, structured_x = z_score_parser(z_score_x) if z_score_x_bool: transform_zx = standardizing_transform(batch_x, structured_x) transform = transforms.CompositeTransform([transform_zx, transform]) z_score_y_bool, structured_y = z_score_parser(z_score_y) if z_score_y_bool: embedding_net = nn.Sequential(standardizing_net(batch_y, structured_y), embedding_net) distribution = distributions_.MADEMoG( features=x_numel, hidden_features=hidden_features, context_features=y_numel, num_blocks=5, num_mixture_components=num_mixture_components, use_residual_blocks=True, random_mask=False, activation=relu, dropout_probability=0.0, use_batch_norm=False, custom_initialization=True, ) neural_net = flows.Flow(transform, distribution, embedding_net) return neural_net
def build_nsf( batch_x: Tensor, batch_y: Tensor, z_score_x: Optional[str] = "independent", z_score_y: Optional[str] = "independent", hidden_features: int = 50, num_transforms: int = 5, num_bins: int = 10, embedding_net: nn.Module = nn.Identity(), tail_bound: float = 3.0, hidden_layers_spline_context: int = 1, num_blocks: int = 2, dropout_probability: float = 0.0, use_batch_norm: bool = False, **kwargs, ) -> nn.Module: """Builds NSF p(x|y). Args: batch_x: Batch of xs, used to infer dimensionality and (optional) z-scoring. batch_y: Batch of ys, used to infer dimensionality and (optional) z-scoring. z_score_x: Whether to z-score xs passing into the network, can be one of: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_y: Whether to z-score ys passing into the network, same options as z_score_x. hidden_features: Number of hidden features. num_transforms: Number of transforms. num_bins: Number of bins used for the splines. embedding_net: Optional embedding network for y. tail_bound: tail bound for each spline. hidden_layers_spline_context: number of hidden layers of the spline context net for one-dimensional x. num_blocks: number of blocks used for residual net for context embedding. dropout_probability: dropout probability for regularization in residual net. use_batch_norm: whether to use batch norm in residual net. kwargs: Additional arguments that are passed by the build function but are not relevant for maf and are therefore ignored. Returns: Neural network. """ x_numel = batch_x[0].numel() # Infer the output dimensionality of the embedding_net by making a forward pass. check_data_device(batch_x, batch_y) check_embedding_net_device(embedding_net=embedding_net, datum=batch_y) y_numel = embedding_net(batch_y[:1]).numel() # Define mask function to alternate between predicted x-dimensions. def mask_in_layer(i): return create_alternating_binary_mask(features=x_numel, even=(i % 2 == 0)) # If x is just a scalar then use a dummy mask and learn spline parameters using the # conditioning variables only. if x_numel == 1: # Conditioner ignores the data and uses the conditioning variables only. conditioner = partial( ContextSplineMap, hidden_features=hidden_features, context_features=y_numel, hidden_layers=hidden_layers_spline_context, ) else: # Use conditional resnet as spline conditioner. conditioner = partial( nets.ResidualNet, hidden_features=hidden_features, context_features=y_numel, num_blocks=num_blocks, activation=relu, dropout_probability=dropout_probability, use_batch_norm=use_batch_norm, ) # Stack spline transforms. transform_list = [] for i in range(num_transforms): block = [ transforms.PiecewiseRationalQuadraticCouplingTransform( mask=mask_in_layer(i) if x_numel > 1 else tensor([1], dtype=uint8), transform_net_create_fn=conditioner, num_bins=num_bins, tails="linear", tail_bound=tail_bound, apply_unconditional_transform=False, ) ] # Add LU transform only for high D x. Permutation makes sense only for more than # one feature. if x_numel > 1: block.append(transforms.LULinear(x_numel, identity_init=True), ) transform_list += block z_score_x_bool, structured_x = z_score_parser(z_score_x) if z_score_x_bool: # Prepend standardizing transform to nsf transforms. transform_list = [standardizing_transform(batch_x, structured_x) ] + transform_list z_score_y_bool, structured_y = z_score_parser(z_score_y) if z_score_y_bool: # Prepend standardizing transform to y-embedding. embedding_net = nn.Sequential(standardizing_net(batch_y, structured_y), embedding_net) distribution = distributions_.StandardNormal((x_numel, )) # Combine transforms. transform = transforms.CompositeTransform(transform_list) neural_net = flows.Flow(transform, distribution, embedding_net) return neural_net