def conditional_spline(input_dim, context_dim, hidden_dims=None, count_bins=8, bound=3.0, order='linear'): """ A helper function to create a :class:`~pyro.distributions.transforms.ConditionalSpline` object that takes care of constructing a dense network with the correct input/output dimensions. :param input_dim: Dimension of input variable :type input_dim: int :param context_dim: Dimension of context variable :type context_dim: int :param hidden_dims: The desired hidden dimensions of the dense network. Defaults to using [input_dim * 10, input_dim * 10] :type hidden_dims: list[int] :param count_bins: The number of segments comprising the spline. :type count_bins: int :param bound: The quantity :math:`K` determining the bounding box, :math:`[-K,K]\times[-K,K]`, of the spline. :type bound: float :param order: One of ['linear', 'quadratic'] specifying the order of the spline. :type order: string """ if hidden_dims is None: hidden_dims = [input_dim * 10, input_dim * 10] if order == 'linear': nn = DenseNN(context_dim, hidden_dims, param_dims=[ input_dim * count_bins, input_dim * count_bins, input_dim * (count_bins - 1), input_dim * count_bins ]) elif order == 'quadratic': nn = DenseNN(context_dim, hidden_dims, param_dims=[ input_dim * count_bins, input_dim * count_bins, input_dim * (count_bins - 1) ]) else: raise ValueError( "Keyword argument 'order' must be one of ['linear', 'quadratic'], but '{}' was found!" .format(order)) return ConditionalSpline(nn, input_dim, count_bins, bound=bound, order=order)
def __init__(self, **kwargs): super().__init__(**kwargs) nonlinearity = Swish() if self.use_swish else nn.LeakyReLU(0.1) brain_volume_net = DenseNN(2, [8, 16], param_dims=[1, 1], nonlinearity=nonlinearity) self.brain_volume_flow_components = ConditionalAffineTransform( context_nn=brain_volume_net, event_dim=0) self.brain_volume_flow_transforms = [ self.brain_volume_flow_components, self.brain_volume_flow_constraint_transforms ] ventricle_volume_net = DenseNN(3, [12, 20], param_dims=[1, 1], nonlinearity=nonlinearity) self.ventricle_volume_flow_components = ConditionalAffineTransform( context_nn=ventricle_volume_net, event_dim=0) self.ventricle_volume_flow_transforms = [ self.ventricle_volume_flow_components, self.ventricle_volume_flow_constraint_transforms ] lesion_volume_net = DenseNN(4, [16, 24], param_dims=[1, 1], nonlinearity=nonlinearity) self.lesion_volume_flow_components = ConditionalAffineTransform( context_nn=lesion_volume_net, event_dim=0) self.lesion_volume_flow_transforms = [ self.lesion_volume_flow_components, self.lesion_volume_flow_constraint_transforms ] duration_net = DenseNN(2, [8, 16], param_dims=[1, 1], nonlinearity=nonlinearity) self.duration_flow_components = ConditionalAffineTransform( context_nn=duration_net, event_dim=0) self.duration_flow_transforms = [ self.duration_flow_components, self.duration_flow_constraint_transforms ] edss_net = DenseNN(2, [8, 16], param_dims=[1, 1], nonlinearity=nonlinearity) self.edss_flow_components = ConditionalAffineTransform( context_nn=edss_net, event_dim=0) self.edss_flow_transforms = [ self.edss_flow_components, self.edss_flow_constraint_transforms ]
def conditional_matrix_exponential(input_dim, context_dim, hidden_dims=None, iterations=8, normalization='none', bound=None): """ A helper function to create a :class:`~pyro.distributions.transforms.ConditionalMatrixExponential` object for consistency with other helpers. :param input_dim: Dimension of input variable :type input_dim: int :param context_dim: Dimension of context variable :type context_dim: int :param hidden_dims: The desired hidden dimensions of the dense network. Defaults to using [input_dim * 10, input_dim * 10] :type hidden_dims: list[int] :param iterations: the number of terms to use in the truncated power series that approximates matrix exponentiation. :type iterations: int :param normalization: One of `['none', 'weight', 'spectral']` normalization that selects what type of normalization to apply to the weight matrix. `weight` corresponds to weight normalization (Salimans and Kingma, 2016) and `spectral` to spectral normalization (Miyato et al, 2018). :type normalization: string :param bound: a bound on either the weight or spectral norm, when either of those two types of regularization are chosen by the `normalization` argument. A lower value for this results in fewer required terms of the truncated power series to closely approximate the exact value of the matrix exponential. :type bound: float """ if hidden_dims is None: hidden_dims = [input_dim * 10, input_dim * 10] nn = DenseNN(context_dim, hidden_dims, param_dims=[input_dim * input_dim]) return ConditionalMatrixExponential(input_dim, nn, iterations=iterations, normalization=normalization, bound=bound)
def spline_coupling(input_dim, split_dim=None, hidden_dims=None, count_bins=8, bound=3.0): """ A helper function to create a :class:`~pyro.distributions.transforms.SplineCoupling` object for consistency with other helpers. :param input_dim: Dimension of input variable :type input_dim: int """ if split_dim is None: split_dim = input_dim // 2 if hidden_dims is None: hidden_dims = [input_dim * 10, input_dim * 10] nn = DenseNN(split_dim, hidden_dims, param_dims=[(input_dim - split_dim) * count_bins, (input_dim - split_dim) * count_bins, (input_dim - split_dim) * (count_bins - 1), (input_dim - split_dim) * count_bins]) return SplineCoupling(input_dim, split_dim, nn, count_bins, bound)
def __init__(self, **kwargs): super().__init__(**kwargs) # Flow for modelling t Gamma self.thickness_flow_components = ComposeTransformModule([Spline(1)]) self.thickness_flow_constraint_transforms = ComposeTransform( [self.thickness_flow_lognorm, ExpTransform()]) self.thickness_flow_transforms = ComposeTransform([ self.thickness_flow_components, self.thickness_flow_constraint_transforms ]) # affine flow for s normal intensity_net = DenseNN(1, [1], param_dims=[1, 1], nonlinearity=torch.nn.Identity()) self.intensity_flow_components = ConditionalAffineTransform( context_nn=intensity_net, event_dim=0) self.intensity_flow_constraint_transforms = ComposeTransform( [SigmoidTransform(), self.intensity_flow_norm]) self.intensity_flow_transforms = [ self.intensity_flow_components, self.intensity_flow_constraint_transforms ]
def conditional_affine_coupling2(input_dim, context_dim, hidden_dims=None, split_dim=None, rich_context_dim=None, dropout=None, **kwargs): if split_dim is None: split_dim = input_dim // 2 if hidden_dims is None: hidden_dims = [10 * input_dim] if rich_context_dim is None: rich_context_dim = 5 * context_dim if dropout is None: hypernet = DenseNN(split_dim + rich_context_dim, hidden_dims, [input_dim - split_dim, input_dim - split_dim]) else: hypernet = DropoutDenseNN( input_dim=split_dim + rich_context_dim, hidden_dims=hidden_dims, dropout=dropout, param_dims=[input_dim - split_dim, input_dim - split_dim]) return ConditionalAffineCoupling2(split_dim, hypernet)
def init_spline_coupling(dim: int, device: str = "cpu", **kwargs): """Intitialize a spline coupling transform, by providing necessary args and kwargs.""" assert dim > 1, "In 1d this would be equivalent to affine flows, use them." split_dim = kwargs.get("split_dim", dim // 2) hidden_dims = kwargs.pop("hidden_dims", [5 * dim + 30, 5 * dim + 30]) nonlinearity = kwargs.pop("nonlinearity", nn.ReLU()) count_bins = kwargs.get("count_bins", 15) order = kwargs.get("order", "linear") bound = kwargs.get("bound", 10) if order == "linear": param_dims = [ (dim - split_dim) * count_bins, (dim - split_dim) * count_bins, (dim - split_dim) * (count_bins - 1), (dim - split_dim) * count_bins, ] else: param_dims = [ (dim - split_dim) * count_bins, (dim - split_dim) * count_bins, (dim - split_dim) * (count_bins - 1), ] neural_net = DenseNN(split_dim, hidden_dims, param_dims, nonlinearity=nonlinearity).to(device) return [dim, split_dim, neural_net], { "count_bins": count_bins, "bound": bound, "order": order, }
def affine_coupling(input_dim, hidden_dims=None, split_dim=None, **kwargs): """ A helper function to create an :class:`~pyro.distributions.transforms.AffineCoupling` object that takes care of constructing a dense network with the correct input/output dimensions. :param input_dim: Dimension of input variable :type input_dim: int :param hidden_dims: The desired hidden dimensions of the dense network. Defaults to using [10*input_dim] :type hidden_dims: list[int] :param split_dim: The dimension to split the input on for the coupling transform. Defaults to using input_dim // 2 :type split_dim: int :param log_scale_min_clip: The minimum value for clipping the log(scale) from the autoregressive NN :type log_scale_min_clip: float :param log_scale_max_clip: The maximum value for clipping the log(scale) from the autoregressive NN :type log_scale_max_clip: float """ if split_dim is None: split_dim = input_dim // 2 if hidden_dims is None: hidden_dims = [10 * input_dim] hypernet = DenseNN(split_dim, hidden_dims, [input_dim - split_dim, input_dim - split_dim]) return AffineCoupling(split_dim, hypernet, **kwargs)
def conditional_affine_coupling(input_dim, context_dim, hidden_dims=None, split_dim=None, rich_context_dim=None, context_hidden_dims=None, **kwargs): if split_dim is None: split_dim = input_dim // 2 if hidden_dims is None: hidden_dims = [10 * input_dim] if rich_context_dim is None: rich_context_dim = 5 * context_dim if context_hidden_dims is None: context_hidden_dims = [10 * context_dim] hypernet = DenseNN(split_dim + rich_context_dim, hidden_dims, [input_dim - split_dim, input_dim - split_dim]) condinet = DenseNN(context_dim, context_hidden_dims, [rich_context_dim]) return ConditionalAffineCoupling(split_dim, hypernet, condinet)
def __init__(self, use_affine_ex: bool = True, **kwargs): super().__init__(**kwargs) self.use_affine_ex = use_affine_ex # decoder parts # Flow for modelling t Gamma self.thickness_flow_components = ComposeTransformModule([Spline(1)]) self.thickness_flow_constraint_transforms = ComposeTransform( [self.thickness_flow_lognorm, ExpTransform()]) self.thickness_flow_transforms = ComposeTransform([ self.thickness_flow_components, self.thickness_flow_constraint_transforms ]) # affine flow for s normal intensity_net = DenseNN(1, [1], param_dims=[1, 1], nonlinearity=torch.nn.Identity()) self.intensity_flow_components = ConditionalAffineTransform( context_nn=intensity_net, event_dim=0) self.intensity_flow_constraint_transforms = ComposeTransform( [SigmoidTransform(), self.intensity_flow_norm]) self.intensity_flow_transforms = [ self.intensity_flow_components, self.intensity_flow_constraint_transforms ] # build flow as s_affine_w * t * e_s + b -> depends on t though # realnvp or so for x self._build_image_flow()
def conditional_householder(input_dim, context_dim, hidden_dims=None, count_transforms=1): """ A helper function to create a :class:`~pyro.distributions.transforms.ConditionalHouseholder` object that takes care of constructing a dense network with the correct input/output dimensions. :param input_dim: Dimension of input variable :type input_dim: int :param context_dim: Dimension of context variable :type context_dim: int :param hidden_dims: The desired hidden dimensions of the dense network. Defaults to using [input_dim * 10, input_dim * 10] :type hidden_dims: list[int] """ if hidden_dims is None: hidden_dims = [input_dim * 10, input_dim * 10] nn = DenseNN(context_dim, hidden_dims, param_dims=[input_dim] * count_transforms) return ConditionalHouseholder(input_dim, nn, count_transforms)
def inverted_conditional_planar_flow_factory(flow_depth, problem_dim, c_net_depth, c_net_h_dim, context_dim, context_n_h_dim, context_n_depth, rich_context_dim, batchnorm_momentum, cuda, context_dropout=None): if cuda: base_dist = dist.Normal( torch.zeros(problem_dim).cuda(), torch.ones(problem_dim).cuda()) else: base_dist = dist.Normal(torch.zeros(problem_dim), torch.ones(problem_dim)) # We define the transformations transforms = [ inverted_conditional_planar( input_dim=problem_dim, context_dim=rich_context_dim, hidden_dims=[c_net_h_dim for i in range(c_net_depth)], ) for i in range(flow_depth) ] # If we want batchnorm add those in. Then sandwich the steps together to a flow if batchnorm_momentum is None: batchnorms = None flows = transforms else: batchnorms = [ batchnorm(input_dim=problem_dim, momentum=batchnorm_momentum) for i in range(flow_depth) ] for bn in batchnorms: bn.gamma.data += torch.ones(problem_dim) flows = list(itertools.chain(*zip(batchnorms, transforms)))[1:] # We define the conditioning network context_hidden_dims = [context_n_h_dim for i in range(context_n_depth)] if context_dropout is None: condinet = DenseNN(input_dim=context_dim, hidden_dims=context_hidden_dims, param_dims=[rich_context_dim]) else: condinet = DropoutDenseNN(input_dim=context_dim, hidden_dims=context_hidden_dims, param_dims=[rich_context_dim], dropout=context_dropout) # We define the normalizing flow wrapper normalizing_flow = ConditionalNormalizingFlowWrapper3( transforms, flows, base_dist, condinet, batchnorms) if cuda: normalizing_flow.cuda() return normalizing_flow
def init_affine_coupling(dim: int, device: str = "cpu", **kwargs): """Provides the default initial arguments for an affine autoregressive transform.""" assert dim > 1, "In 1d this would be equivalent to affine flows, use them." nonlinearity = kwargs.pop("nonlinearity", nn.ReLU()) split_dim = kwargs.get("split_dim", dim // 2) hidden_dims = kwargs.pop("hidden_dims", [5 * dim + 20, 5 * dim + 20]) arn = DenseNN(split_dim, hidden_dims, nonlinearity=nonlinearity).to(device) return [split_dim, arn], {"log_scale_min_clip": -3.0}
def inverted_conditional_planar(input_dim, context_dim, hidden_dims=None): if hidden_dims is None: hidden_dims = [input_dim * 10, input_dim * 10] nn = DenseNN(context_dim, hidden_dims, param_dims=[1, input_dim, input_dim]) return InvertedConditionalPlanar(nn)
def conditional_normalizing_flow_factory2(flow_depth, problem_dim, c_net_depth, c_net_h_dim, context_dim, context_n_h_dim, context_n_depth, rich_context_dim, cuda, coupling_dropout=None, context_dropout=None): # We define the base distribution if cuda: base_dist = dist.Normal( torch.zeros(problem_dim).cuda(), torch.ones(problem_dim).cuda()) else: base_dist = dist.Normal(torch.zeros(problem_dim), torch.ones(problem_dim)) # We define the transformations transforms = [ conditional_affine_coupling2( input_dim=problem_dim, context_dim=context_dim, hidden_dims=[ c_net_h_dim for i in range(c_net_depth) ], # Note array here to create multiple layers in DenseNN rich_context_dim=rich_context_dim, dropout=coupling_dropout) for i in range(flow_depth) ] # need a fix for this perms = [permute(2, torch.tensor([1, 0])) for i in range(flow_depth)] # We sandwich the AffineCouplings and permutes together. Unelegant hotfix to remove last permute but it works flows = list(itertools.chain(*zip(transforms, perms)))[:-1] # We define the conditioning network context_hidden_dims = [context_n_h_dim for i in range(context_n_depth)] if context_dropout is None: condinet = DenseNN(input_dim=context_dim, hidden_dims=context_hidden_dims, param_dims=[rich_context_dim]) else: condinet = DropoutDenseNN(input_dim=context_dim, hidden_dims=context_hidden_dims, param_dims=[rich_context_dim], dropout=context_dropout) # We define the normalizing flow wrapper normalizing_flow = ConditionalNormalizingFlowWrapper2( transforms, flows, base_dist, condinet) if cuda: normalizing_flow.cuda() return normalizing_flow
def affine_coupling(input_dim, hidden_dims=None, split_dim=None, dim=-1, **kwargs): """ A helper function to create an :class:`~pyro.distributions.transforms.AffineCoupling` object that takes care of constructing a dense network with the correct input/output dimensions. :param input_dim: Dimension(s) of input variable to permute. Note that when `dim < -1` this must be a tuple corresponding to the event shape. :type input_dim: int :param hidden_dims: The desired hidden dimensions of the dense network. Defaults to using [10*input_dim] :type hidden_dims: list[int] :param split_dim: The dimension to split the input on for the coupling transform. Defaults to using input_dim // 2 :type split_dim: int :param dim: the tensor dimension on which to split. This value must be negative and defines the event dim as `abs(dim)`. :type dim: int :param log_scale_min_clip: The minimum value for clipping the log(scale) from the autoregressive NN :type log_scale_min_clip: float :param log_scale_max_clip: The maximum value for clipping the log(scale) from the autoregressive NN :type log_scale_max_clip: float """ if not isinstance(input_dim, int): if len(input_dim) != -dim: raise ValueError( "event shape {} must have same length as event_dim {}".format( input_dim, -dim)) event_shape = input_dim extra_dims = reduce(operator.mul, event_shape[(dim + 1):], 1) else: event_shape = [input_dim] extra_dims = 1 event_shape = list(event_shape) if split_dim is None: split_dim = event_shape[dim] // 2 if hidden_dims is None: hidden_dims = [10 * event_shape[dim] * extra_dims] hypernet = DenseNN( split_dim * extra_dims, hidden_dims, [ (event_shape[dim] - split_dim) * extra_dims, (event_shape[dim] - split_dim) * extra_dims, ], ) return AffineCoupling(split_dim, hypernet, dim=dim, **kwargs)
def _build_image_flow(self): self.trans_modules = ComposeTransformModule([]) self.x_transforms = [] self.x_transforms += [self._get_preprocess_transforms()] c = 1 for _ in range(self.num_scales): self.x_transforms.append(SqueezeTransform()) c *= 4 for _ in range(self.flows_per_scale): if self.use_actnorm: actnorm = ActNorm(c) self.trans_modules.append(actnorm) self.x_transforms.append(actnorm) gcp = GeneralizedChannelPermute(channels=c) self.trans_modules.append(gcp) self.x_transforms.append(gcp) self.x_transforms.append( TransposeTransform(torch.tensor((1, 2, 0)))) ac = ConditionalAffineCoupling( c // 2, BasicFlowConvNet(c // 2, self.hidden_channels, (c // 2, c // 2), 2)) self.trans_modules.append(ac) self.x_transforms.append(ac) self.x_transforms.append( TransposeTransform(torch.tensor((2, 0, 1)))) gcp = GeneralizedChannelPermute(channels=c) self.trans_modules.append(gcp) self.x_transforms.append(gcp) self.x_transforms += [ ReshapeTransform((4**self.num_scales, 32 // 2**self.num_scales, 32 // 2**self.num_scales), (1, 32, 32)) ] if self.use_affine_ex: affine_net = DenseNN(2, [16, 16], param_dims=[1, 1]) affine_trans = ConditionalAffineTransform(context_nn=affine_net, event_dim=3) self.trans_modules.append(affine_trans) self.x_transforms.append(affine_trans)
def conditional_spline(input_dim, context_dim, hidden_dims=None, count_bins=8, bound=3.0, order='linear', nonlinearity=nn.LeakyReLU(0.1)): if hidden_dims is None: hidden_dims = [input_dim * 10, input_dim * 10] if order == 'linear': net = DenseNN(context_dim, hidden_dims, param_dims=[input_dim * count_bins, input_dim * count_bins, input_dim * (count_bins - 1), input_dim * count_bins], nonlinearity=nonlinearity) elif order == 'quadratic': net = DenseNN(context_dim, hidden_dims, param_dims=[input_dim * count_bins, input_dim * count_bins, input_dim * (count_bins - 1)], nonlinearity=nonlinearity) else: raise ValueError("Keyword argument 'order' must be one of ['linear', 'quadratic'], but '{}' was found!".format(order)) return ConditionalSpline(net, input_dim, count_bins, bound=bound, order=order)
def spline_coupling(input_dim, split_dim=None, hidden_dims=None, count_bins=8, bound=3.0, nonlinearity=nn.LeakyReLU(0.1)): if split_dim is None: split_dim = input_dim // 2 if hidden_dims is None: hidden_dims = [input_dim * 10, input_dim * 10] net = DenseNN(split_dim, hidden_dims, param_dims=[(input_dim - split_dim) * count_bins, (input_dim - split_dim) * count_bins, (input_dim - split_dim) * (count_bins - 1), (input_dim - split_dim) * count_bins], nonlinearity=nonlinearity) return SplineCoupling(input_dim, split_dim, net, count_bins, bound)
def __init__(self, **kwargs): super().__init__(**kwargs) # ventricle_volume flow ventricle_volume_net = DenseNN(2, [8, 16], param_dims=[1, 1], nonlinearity=torch.nn.LeakyReLU(.1)) self.ventricle_volume_flow_components = ConditionalAffineTransform( context_nn=ventricle_volume_net, event_dim=0) self.ventricle_volume_flow_transforms = [ self.ventricle_volume_flow_components, self.ventricle_volume_flow_constraint_transforms ] # brain_volume flow brain_volume_net = DenseNN(2, [8, 16], param_dims=[1, 1], nonlinearity=torch.nn.LeakyReLU(.1)) self.brain_volume_flow_components = ConditionalAffineTransform( context_nn=brain_volume_net, event_dim=0) self.brain_volume_flow_transforms = [ self.brain_volume_flow_components, self.brain_volume_flow_constraint_transforms ]
def conditional_generalized_channel_permute(context_dim, channels=3, hidden_dims=None): """ A helper function to create a :class:`~pyro.distributions.transforms.ConditionalGeneralizedChannelPermute` object for consistency with other helpers. :param channels: Number of channel dimensions in the input. :type channels: int """ if hidden_dims is None: hidden_dims = [channels * 10, channels * 10] nn = DenseNN(context_dim, hidden_dims, param_dims=[channels * channels]) return ConditionalGeneralizedChannelPermute(nn, channels)
def conditional_normalizing_flow_factory3(flow_depth, problem_dim, c_net_depth, c_net_h_dim, context_dim, context_n_h_dim, context_n_depth, rich_context_dim, batchnorm_momentum, cuda, coupling_dropout=None, context_dropout=None): if cuda: base_dist = dist.Normal(torch.zeros(problem_dim).cuda(), torch.ones(problem_dim).cuda()) else: base_dist = dist.Normal(torch.zeros(problem_dim), torch.ones(problem_dim)) # We define the transformations transforms = [conditional_affine_coupling2(input_dim=problem_dim, context_dim=context_dim, hidden_dims=[c_net_h_dim for i in range(c_net_depth)], # Note array here to create multiple layers in DenseNN rich_context_dim=rich_context_dim, dropout=coupling_dropout) for i in range(flow_depth)] # Permutes are needed to be able to transform all dimensions. # Note that the transform is fixed here since we only have 2 dimensions. For more dimensions don't fix it and let it be random. perms = [permute(input_dim=problem_dim, permutation=torch.tensor([1, 0])) for i in range(flow_depth)] # If we want batchnorm add those in. Then sandwich the steps together to a flow if batchnorm_momentum is None: batchnorms = None flows = list(itertools.chain(*zip(transforms, perms)))[:-1] else: batchnorms = [batchnorm(input_dim=problem_dim, momentum=batchnorm_momentum) for i in range(flow_depth)] for bn in batchnorms: bn.gamma.data += torch.ones(problem_dim) flows = list(itertools.chain(*zip(batchnorms, transforms, perms)))[1:-1] # We define the conditioning network context_hidden_dims = [context_n_h_dim for i in range(context_n_depth)] if context_dropout is None: condinet = DenseNN(input_dim=context_dim, hidden_dims=context_hidden_dims, param_dims=[rich_context_dim]) else: condinet = DropoutDenseNN(input_dim=context_dim, hidden_dims=context_hidden_dims, param_dims=[rich_context_dim], dropout=context_dropout) # We define the normalizing flow wrapper normalizing_flow = ConditionalNormalizingFlowWrapper3(transforms, flows, base_dist, condinet, batchnorms) if cuda: normalizing_flow.cuda() return normalizing_flow
def __init__(self, flow_type, num_flows, hidden_dim=20, need_permute=False): super(NormFlow, self).__init__() self.need_permute = need_permute if flow_type == 'IAF': self.flow = nn.ModuleList([ AffineAutoregressive(AutoRegressiveNN(hidden_dim, [2 * hidden_dim]), stable=True) for _ in range(num_flows) ]) elif flow_type == 'BNAF': self.flow = nn.ModuleList([ BlockAutoregressive(input_dim=hidden_dim) for _ in range(num_flows) ]) elif flow_type == 'RNVP': split_dim = hidden_dim // 2 param_dims = [hidden_dim - split_dim, hidden_dim - split_dim] hypernet = DenseNN(split_dim, [2 * hidden_dim], param_dims) self.flow = nn.ModuleList([ AffineCoupling(split_dim, hypernet) for _ in range(num_flows) ]) else: raise NotImplementedError even = [i for i in range(0, hidden_dim, 2)] odd = [i for i in range(1, hidden_dim, 2)] undo_eo = [ i // 2 if i % 2 == 0 else (i // 2 + len(even)) for i in range(hidden_dim) ] undo_oe = [(i // 2 + len(odd)) if i % 2 == 0 else i // 2 for i in range(hidden_dim)] self.register_buffer('eo', torch.tensor(even + odd, dtype=torch.int64)) self.register_buffer('oe', torch.tensor(odd + even, dtype=torch.int64)) self.register_buffer('undo_eo', torch.tensor(undo_eo, dtype=torch.int64)) self.register_buffer('undo_oe', torch.tensor(undo_oe, dtype=torch.int64))
def affine_coupling(input_dim, hidden_dims=None, split_dim=None, dim=-1, nonlinearity=nn.LeakyReLU(0.1), **kwargs): if not isinstance(input_dim, int): if len(input_dim) != -dim: raise ValueError('event shape {} must have same length as event_dim {}'.format(input_dim, -dim)) event_shape = input_dim extra_dims = reduce(operator.mul, event_shape[(dim + 1):], 1) else: event_shape = [input_dim] extra_dims = 1 event_shape = list(event_shape) if split_dim is None: split_dim = event_shape[dim] // 2 if hidden_dims is None: hidden_dims = [10 * event_shape[dim] * extra_dims] hypernet = DenseNN(split_dim * extra_dims, hidden_dims, [(event_shape[dim] - split_dim) * extra_dims, (event_shape[dim] - split_dim) * extra_dims], nonlinearity=nonlinearity) return AffineCoupling(split_dim, hypernet, dim=dim, **kwargs)
def __init__(self, input_dim=2, split_dim=1, hidden_dim=128, num_layers=1, flow_length=10, use_cuda=False): super(NormalizingFlow, self).__init__() self.base_dist = dist.Normal( torch.zeros(input_dim), torch.ones(input_dim)) # base distribution is Isotropic Gaussian self.param_dims = [input_dim - split_dim, input_dim - split_dim] # Define series of bijective transformations self.transforms = [ AffineCoupling( split_dim, DenseNN(split_dim, [hidden_dim] * num_layers, self.param_dims)) for _ in range(flow_length) ] self.perms = [ permute(2, torch.tensor([1, 0])) for _ in range(flow_length) ] # Concatenate AffineCoupling layers with Permute Layers self.generative_flows = list( itertools.chain(*zip(self.transforms, self.perms)) )[:-1] # generative direction (z-->x) self.normalizing_flows = self.generative_flows[:: -1] # normalizing direction (x-->z) self.flow_dist = dist.TransformedDistribution(self.base_dist, self.generative_flows) self.use_cuda = use_cuda if self.use_cuda: self.cuda() nn.ModuleList(self.transforms).cuda() self.base_dist = dist.Normal( torch.zeros(input_dim).cuda(), torch.ones(input_dim).cuda())
def train_vae(args): # pdb.set_trace() best_metric = -float("inf") prior_params = list([]) varflow_params = list([]) prior_flow = None variational_flow = None data = Dataset(args) if args.data in ['goodreads', 'big_dataset']: args.feature_shape = data.feature_shape if args.nf_prior: flows = [] for i in range(args.num_flows_prior): if args.nf_prior == 'IAF': one_arn = AutoRegressiveNN(args.z_dim, [2 * args.z_dim]).to(args.device) one_flow = AffineAutoregressive(one_arn) elif args.nf_prior == 'RNVP': hypernet = DenseNN( input_dim=args.z_dim // 2, hidden_dims=[2 * args.z_dim, 2 * args.z_dim], param_dims=[ args.z_dim - args.z_dim // 2, args.z_dim - args.z_dim // 2 ]).to(args.device) one_flow = AffineCoupling(args.z_dim // 2, hypernet).to(args.device) flows.append(one_flow) prior_flow = nn.ModuleList(flows) prior_params = list(prior_flow.parameters()) if args.data == 'mnist': encoder = Encoder(args).to(args.device) elif args.data in ['goodreads', 'big_dataset']: encoder = Encoder_rec(args).to(args.device) if args.nf_vardistr: flows = [] for i in range(args.num_flows_vardistr): one_arn = AutoRegressiveNN(args.z_dim, [2 * args.z_dim], param_dims=[2 * args.z_dim] * 3).to( args.device) one_flows = NeuralAutoregressive(one_arn, hidden_units=256) flows.append(one_flows) variational_flow = nn.ModuleList(flows) varflow_params = list(variational_flow.parameters()) if args.data == 'mnist': decoder = Decoder(args).to(args.device) elif args.data in ['goodreads', 'big_dataset']: decoder = Decoder_rec(args).to(args.device) params = list(encoder.parameters()) + list( decoder.parameters()) + prior_params + varflow_params optimizer = torch.optim.Adam(params=params) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1) current_tolerance = 0 # with torch.autograd.detect_anomaly(): for ep in tqdm(range(args.num_epoches)): # training cycle for batch_num, batch_train in enumerate(data.next_train_batch()): batch_train_repeated = batch_train.repeat( *[[args.n_samples] + [1] * (len(batch_train.shape) - 1)]) mu, sigma = encoder(batch_train_repeated) sum_log_sigma = torch.sum(torch.log(sigma), 1) sum_log_jacobian = 0. eps = args.std_normal.sample(mu.shape) z = mu + sigma * eps if not args.use_reparam: z = z.detach() if variational_flow: prev_v = z for flow_num in range(args.num_flows_vardistr): u = variational_flow[flow_num](prev_v) sum_log_jacobian += variational_flow[ flow_num].log_abs_det_jacobian(prev_v, u) prev_v = u z = u logits = decoder(z) elbo = compute_objective(args=args, x_logits=logits, x_true=batch_train_repeated, sampled_noise=eps, inf_samples=z, sum_log_sigma=sum_log_sigma, prior_flow=prior_flow, sum_log_jacobian=sum_log_jacobian, mu=mu, sigma=sigma) (-elbo).backward() optimizer.step() optimizer.zero_grad() # scheduler step scheduler.step() # validation with torch.no_grad(): metric = validate_vae(args=args, encoder=encoder, decoder=decoder, dataset=data, prior_flow=prior_flow, variational_flow=variational_flow) if (metric != metric).sum(): print('NAN appeared!') raise ValueError if metric > best_metric: current_tolerance = 0 best_metric = metric if not os.path.exists('./models/{}/'.format(args.data)): os.makedirs('./models/{}/'.format(args.data)) torch.save( encoder, './models/{}/best_encoder_data_{}_skips_{}_prior_{}_numflows_{}_varflow_{}_numvarflows_{}_samples_{}_zdim_{}_usereparam_{}.pt' .format(args.data, args.data, args.use_skips, args.nf_prior, args.num_flows_prior, args.nf_vardistr, args.num_flows_vardistr, args.n_samples, args.z_dim, args.use_reparam)) torch.save( decoder, './models/{}/best_decoder_data_{}_skips_{}_prior_{}_numflows_{}_varflow_{}_numvarflows_{}_samples_{}_zdim_{}_usereparam_{}.pt' .format(args.data, args.data, args.use_skips, args.nf_prior, args.num_flows_prior, args.nf_vardistr, args.num_flows_vardistr, args.n_samples, args.z_dim, args.use_reparam)) if args.nf_prior: torch.save( prior_flow, './models/{}/best_prior_data_{}_skips_{}_prior_{}_numflows_{}_varflow_{}_numvarflows_{}_samples_{}_zdim_{}_usereparam_{}.pt' .format(args.data, args.data, args.use_skips, args.nf_prior, args.num_flows_prior, args.nf_vardistr, args.num_flows_vardistr, args.n_samples, args.z_dim, args.use_reparam)) if args.nf_vardistr: torch.save( variational_flow, './models/{}/best_varflow_data_{}_skips_{}_prior_{}_numflows_{}_varflow_{}_numvarflows_{}_samples_{}_zdim_{}_usereparam_{}.pt' .format(args.data, args.data, args.use_skips, args.nf_prior, args.num_flows_prior, args.nf_vardistr, args.num_flows_vardistr, args.n_samples, args.z_dim, args.use_reparam)) else: current_tolerance += 1 if current_tolerance >= args.early_stopping_tolerance: print( "Early stopping on epoch {} (effectively trained for {} epoches)" .format(ep, ep - args.early_stopping_tolerance)) break print( 'Current epoch: {}'.format(ep), '\t', 'Current validation {}: {}'.format(args.metric_name, metric), '\t', 'Best validation {}: {}'.format(args.metric_name, best_metric)) # return best models: encoder = torch.load( './models/{}/best_encoder_data_{}_skips_{}_prior_{}_numflows_{}_varflow_{}_numvarflows_{}_samples_{}_zdim_{}_usereparam_{}.pt' .format(args.data, args.data, args.use_skips, args.nf_prior, args.num_flows_prior, args.nf_vardistr, args.num_flows_vardistr, args.n_samples, args.z_dim, args.use_reparam)) decoder = torch.load( './models/{}/best_decoder_data_{}_skips_{}_prior_{}_numflows_{}_varflow_{}_numvarflows_{}_samples_{}_zdim_{}_usereparam_{}.pt' .format(args.data, args.data, args.use_skips, args.nf_prior, args.num_flows_prior, args.nf_vardistr, args.num_flows_vardistr, args.n_samples, args.z_dim, args.use_reparam)) if args.nf_prior: prior_flow = torch.load( './models/{}/best_prior_data_{}_skips_{}_prior_{}_numflows_{}_varflow_{}_numvarflows_{}_samples_{}_zdim_{}_usereparam_{}.pt' .format(args.data, args.data, args.use_skips, args.nf_prior, args.num_flows_prior, args.nf_vardistr, args.num_flows_vardistr, args.n_samples, args.z_dim, args.use_reparam)) if args.nf_vardistr: variational_flow = torch.load( './models/{}/best_varflow_data_{}_skips_{}_prior_{}_numflows_{}_varflow_{}_numvarflows_{}_samples_{}_zdim_{}_usereparam_{}.pt' .format(args.data, args.data, args.use_skips, args.nf_prior, args.num_flows_prior, args.nf_vardistr, args.num_flows_vardistr, args.n_samples, args.z_dim, args.use_reparam)) return encoder, decoder, prior_flow, variational_flow, data
def combi_conditional_normalizing_flow_factory(flow_depth, problem_dim, c_net_depth, c_net_h_dim, context_dim, context_n_h_dim, context_n_depth, rich_context_dim, batchnorm_momentum, cuda, coupling_dropout=None, context_dropout=None, planar_first=True): assert ( flow_depth % 2 == 0 ), "The flow depth must be divisible by 2 to allow both Planar and AC transforms" if cuda: base_dist = dist.Normal( torch.zeros(problem_dim).cuda(), torch.ones(problem_dim).cuda()) else: base_dist = dist.Normal(torch.zeros(problem_dim), torch.ones(problem_dim)) # We define the transformations affine_couplings = [ conditional_affine_coupling2( input_dim=problem_dim, context_dim=context_dim, hidden_dims=[c_net_h_dim for i in range(c_net_depth)], rich_context_dim=rich_context_dim, dropout=coupling_dropout) for i in range(flow_depth // 2) ] planars = [ inverted_conditional_planar( input_dim=problem_dim, context_dim=rich_context_dim, hidden_dims=[c_net_h_dim for i in range(c_net_depth)], ) for i in range(flow_depth // 2) ] transforms = affine_couplings + planars # Permutes are needed to be able to transform all dimensions. # Note that the transform is fixed here since we only have 2 dimensions. For more dimensions let it be random. perms = [ permute(input_dim=problem_dim, permutation=torch.tensor([1, 0])) for i in range(flow_depth // 2) ] # Assemble the flow if planar_first: flows = list( itertools.chain(*zip(planars, affine_couplings, perms)))[:-1] else: flows = list( itertools.chain(*zip(affine_couplings, planars, perms)))[:-1] # If we want batchnorm add those in. Then sandwich the steps together to a flow if batchnorm_momentum is None: batchnorms = None else: bn_flow = flows[:1] batchnorms = [] for trans in flows[1:]: if isinstance(trans, ConditionalAffineCoupling2) or isinstance( trans, InvertedConditionalPlanar): batchnorms.append( batchnorm(input_dim=problem_dim, momentum=batchnorm_momentum)) bn_flow.append(batchnorms[-1]) bn_flow.append(trans) flows = bn_flow for bn in batchnorms: bn.gamma.data += torch.ones(problem_dim) # We define the conditioning network context_hidden_dims = [context_n_h_dim for i in range(context_n_depth)] if context_dropout is None: condinet = DenseNN(input_dim=context_dim, hidden_dims=context_hidden_dims, param_dims=[rich_context_dim]) else: condinet = DropoutDenseNN(input_dim=context_dim, hidden_dims=context_hidden_dims, param_dims=[rich_context_dim], dropout=context_dropout) # We define the normalizing flow wrapper normalizing_flow = ConditionalNormalizingFlowWrapper3( transforms, flows, base_dist, condinet, batchnorms) if cuda: normalizing_flow.cuda() return normalizing_flow
def _make_cond_planar(self, input_dim, observed_dim): hypernet = DenseNN(observed_dim, [input_dim * 10, input_dim * 10], param_dims=[1, input_dim, input_dim]) z = torch.rand(observed_dim) return transforms.ConditionalPlanarFlow(hypernet).condition(z)
def __init__(self, **kwargs): super().__init__(**kwargs) # decoder parts self.decoder = Decoder(num_convolutions=self.num_convolutions, filters=self.dec_filters, latent_dim=self.latent_dim + 3, upconv=self.use_upconv) self.decoder_mean = torch.nn.Conv2d(1, 1, 1) self.decoder_logstd = torch.nn.Parameter( torch.ones([]) * self.logstd_init) self.decoder_affine_param_net = nn.Sequential( nn.Linear(self.latent_dim + 3, self.latent_dim), nn.ReLU(), nn.Linear(self.latent_dim, self.latent_dim), nn.ReLU(), nn.Linear(self.latent_dim, 6)) self.decoder_affine_param_net[-1].weight.data.zero_() self.decoder_affine_param_net[-1].bias.data.copy_( torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float)) # age flow self.age_flow_components = ComposeTransformModule([Spline(1)]) self.age_flow_lognorm = AffineTransform(loc=0., scale=1.) self.age_flow_constraint_transforms = ComposeTransform( [self.age_flow_lognorm, ExpTransform()]) self.age_flow_transforms = ComposeTransform( [self.age_flow_components, self.age_flow_constraint_transforms]) # ventricle_volume flow # TODO: decide on how many things to condition on ventricle_volume_net = DenseNN(2, [8, 16], param_dims=[1, 1], nonlinearity=torch.nn.Identity()) self.ventricle_volume_flow_components = ConditionalAffineTransform( context_nn=ventricle_volume_net, event_dim=0) self.ventricle_volume_flow_lognorm = AffineTransform(loc=0., scale=1.) self.ventricle_volume_flow_constraint_transforms = ComposeTransform( [self.ventricle_volume_flow_lognorm, ExpTransform()]) self.ventricle_volume_flow_transforms = [ self.ventricle_volume_flow_components, self.ventricle_volume_flow_constraint_transforms ] # brain_volume flow # TODO: decide on how many things to condition on brain_volume_net = DenseNN(2, [8, 16], param_dims=[1, 1], nonlinearity=torch.nn.Identity()) self.brain_volume_flow_components = ConditionalAffineTransform( context_nn=brain_volume_net, event_dim=0) self.brain_volume_flow_lognorm = AffineTransform(loc=0., scale=1.) self.brain_volume_flow_constraint_transforms = ComposeTransform( [self.brain_volume_flow_lognorm, ExpTransform()]) self.brain_volume_flow_transforms = [ self.brain_volume_flow_components, self.brain_volume_flow_constraint_transforms ] # encoder parts self.encoder = Encoder(num_convolutions=self.num_convolutions, filters=self.enc_filters, latent_dim=self.latent_dim) # TODO: do we need to replicate the PGM here to be able to run conterfactuals? oO latent_layers = torch.nn.Sequential( torch.nn.Linear(self.latent_dim + 3, self.latent_dim), torch.nn.ReLU()) self.latent_encoder = DeepIndepNormal(latent_layers, self.latent_dim, self.latent_dim)
def _make_affine_coupling(self, input_dim): split_dim = input_dim // 2 hypernet = DenseNN(split_dim, [10 * input_dim], [input_dim - split_dim, input_dim - split_dim]) return transforms.AffineCoupling(split_dim, hypernet)