def set_up_networks(seed=10, dim=2): torch.manual_seed(seed) base_dist_lik = StandardNormal(shape=[2]) num_layers = 5 transforms = [] for _ in range(num_layers): transforms.append(ReversePermutation(features=2)) transforms.append( MaskedAffineAutoregressiveTransform(features=2, hidden_features=50, context_features=dim, num_blocks=1)) transform = CompositeTransform(transforms) flow_lik = Flow(transform, base_dist_lik) base_dist_post = StandardNormal( shape=[dim] ) # BoxUniform(low=-2*torch.ones(2), high=2*torch.ones(2)) #StandardNormal(shape=[dim]) # base_dist_post = BoxUniform(low=-2*torch.ones(2), high=2*torch.ones(2)) num_layers = 5 transforms = [] num_off_set = 0.0001 # numerical offset since the prior is on the open space #shift, scale = calc_scale_and_shift(-1, 1) #print(shift) #print(scale) transforms.append(PointwiseAffineTransform(shift=0.5, scale=1 / 4.0)) #transforms.append(PointwiseAffineTransform(shift=shift, scale=scale)) transforms.append(InvSigmoid.InvSigmoid()) # this should be inv sigmoide! for _ in range(num_layers): transforms.append(ReversePermutation(features=dim)) transforms.append( MaskedAffineAutoregressiveTransform(features=dim, hidden_features=50, context_features=2, num_blocks=1)) transform = CompositeTransform(transforms) flow_post = Flow(transform, base_dist_post) return flow_lik, flow_post
def set_up_networks(seed=10, dim=4): torch.manual_seed(seed) base_dist_lik = StandardNormal(shape=[9]) num_layers = 5 transforms = [] for _ in range(num_layers): # TODO add inv sigmoide fnunc transforms.append(ReversePermutation(features=9)) transforms.append( MaskedAffineAutoregressiveTransform(features=9, hidden_features=10, context_features=dim, num_blocks=2)) transform = CompositeTransform(transforms) flow_lik = Flow(transform, base_dist_lik) base_dist_post = StandardNormal(shape=[dim]) num_layers = 4 transforms = [] num_off_set = 0.0001 # numerical offset since the prior is on the open space shift, scale = calc_scale_and_shift(-5, 2) #transforms.append(PointwiseAffineTransform(shift=5 / 7.0, scale=1 / 7.0)) transforms.append(PointwiseAffineTransform(shift=shift, scale=scale)) transforms.append(InvSigmoid.InvSigmoid()) # this should be inv sigmoide! for _ in range(num_layers): transforms.append(ReversePermutation(features=dim)) transforms.append( MaskedAffineAutoregressiveTransform(features=dim, hidden_features=10, context_features=9, num_blocks=2)) transform = CompositeTransform(transforms) flow_post = Flow(transform, base_dist_post) return flow_lik, flow_post
def make_model(num_layers, num_features, num_hidden_features, device): #context_encoder = nn.Sequential( # nn.Linear(num_features, 4*num_features), # nn.ReLU(), # nn.Linear(4*num_features, 4*num_features), # nn.ReLU(), # nn.Linear(4*num_features, 2*num_features) # ) #context_encoder = nn.Sequential( # nn.Linear(num_features, 2*num_features), # nn.ReLU(), # nn.Linear(2*num_features, 2*num_features), # nn.ReLU(), # nn.Linear(2*num_features, 2*num_features) # ) context_encoder = nn.Sequential(nn.Linear(num_features, 256), nn.ReLU(), nn.Linear(256, 256), nn.ReLU(), nn.Linear(256, 2 * num_features)) base_dist = ConditionalDiagonalNormal(shape=[num_features], context_encoder=context_encoder) #base_dist = StandardNormal(shape=[num_features]) #base_dist = DiagonalNormal(shape=[num_features]) transforms = [] for _ in range(num_layers): transforms.append(ReversePermutation(features=num_features)) #UMNN #transforms.append(MaskedUMNNAutoregressiveTransform(features=num_features, # hidden_features=num_hidden_features)) #Conditional MAA #transforms.append(MaskedAffineAutoregressiveTransform(features=num_features, # hidden_features=num_hidden_features, # context_features=num_features)) #Conditional UMNN transforms.append( MaskedUMNNAutoregressiveTransform( features=num_features, hidden_features=num_hidden_features, context_features=num_features)) transform = CompositeTransform(transforms) #Uncomment the below if float64 #flow = Flow(transform, base_dist).double().to(device) #Uncomment the below if float32 flow = Flow(transform, base_dist).to(device) optimizer = optim.Adam(flow.parameters()) return (flow, optimizer)
def toy_flow(args, n_blocks, input_dim, hidden_dim, num_layers): base_dist = StandardNormal(shape=[input_dim]) transforms = [] for _ in range(num_layers): transforms.append(ReversePermutation(features=input_dim)) transforms.append( MaskedAffineAutoregressiveTransform(features=input_dim, hidden_features=hidden_dim)) transform = CompositeTransform(transforms) flow = Flow(transform, base_dist) return flow
def set_up_networks(seed=10, dim=2): torch.manual_seed(seed) base_dist = StandardNormal(shape=[10]) num_layers = 4 transforms = [] for _ in range(num_layers): transforms.append(ReversePermutation(features=10)) transforms.append(MaskedAffineAutoregressiveTransform(features=10, hidden_features=40, context_features=dim, num_blocks=1)) transform = CompositeTransform(transforms) flow_lik = Flow(transform, base_dist) base_dist_post = StandardNormal(shape=[dim]) num_layers = 4 transforms = [] for _ in range(num_layers): transforms.append(ReversePermutation(features=dim)) transforms.append(MaskedAffineAutoregressiveTransform(features=dim, hidden_features=40, context_features=5, # it sort of makes more sense that # context_features is the nbr of features returned by the # summary net num_blocks=1)) transform = CompositeTransform(transforms) # here we add the SummaryNet as an embedded network to the flow model for the posterior flow_post = Flow(transform, base_dist_post, embedding_net=SummaryNet()) return flow_lik, flow_post
def set_up_networks(seed=10, dim=2): torch.manual_seed(seed) base_dist_lik = StandardNormal(shape=[5]) num_layers = 5 transforms = [] for _ in range(num_layers): transforms.append(ReversePermutation(features=5)) transforms.append( MaskedAffineAutoregressiveTransform(features=5, hidden_features=10, context_features=dim, num_blocks=2)) transform = CompositeTransform(transforms) flow_lik = Flow(transform, base_dist_lik) base_dist_post = StandardNormal(shape=[dim]) num_layers = 4 transforms = [] for _ in range(num_layers): transforms.append(ReversePermutation(features=dim)) transforms.append( MaskedAffineAutoregressiveTransform(features=dim, hidden_features=10, context_features=5, num_blocks=2)) transform = CompositeTransform(transforms) flow_post = Flow(transform, base_dist_post) return flow_lik, flow_post
def make_model(num_layers,num_features,num_hidden_features,device): base_dist = StandardNormal(shape=[num_features]) #base_dist = DiagonalNormal(shape=[num_features]) transforms = [] for _ in range(num_layers): transforms.append(ReversePermutation(features=num_features)) transforms.append(MaskedUMNNAutoregressiveTransform(features=num_features, hidden_features=num_hidden_features)) transform = CompositeTransform(transforms) #Uncomment the below if float64 #flow = Flow(transform, base_dist).double().to(device) flow = Flow(transform, base_dist).to(device) optimizer = optim.Adam(flow.parameters()) return (flow,optimizer)
def __init__( self, features, hidden_features, num_layers, num_blocks_per_layer, use_volume_preserving=False, activation=F.relu, dropout_probability=0.0, batch_norm_within_layers=False, batch_norm_between_layers=False, ): if use_volume_preserving: coupling_constructor = AdditiveCouplingTransform else: coupling_constructor = AffineCouplingTransform mask = torch.ones(features) mask[::2] = -1 def create_resnet(in_features, out_features): return nets.ResidualNet( in_features, out_features, hidden_features=hidden_features, num_blocks=num_blocks_per_layer, activation=activation, dropout_probability=dropout_probability, use_batch_norm=batch_norm_within_layers, ) layers = [] for _ in range(num_layers): transform = coupling_constructor( mask=mask, transform_net_create_fn=create_resnet) layers.append(transform) mask *= -1 if batch_norm_between_layers: layers.append(BatchNorm(features=features)) super().__init__( transform=CompositeTransform(layers), distribution=StandardNormal([features]), )
def __init__( self, features, hidden_features, num_layers, num_blocks_per_layer, use_residual_blocks=True, use_random_masks=False, use_random_permutations=False, activation=F.relu, dropout_probability=0.0, batch_norm_within_layers=False, batch_norm_between_layers=False, ): if use_random_permutations: permutation_constructor = RandomPermutation else: permutation_constructor = ReversePermutation layers = [] for _ in range(num_layers): layers.append(permutation_constructor(features)) layers.append( MaskedAffineAutoregressiveTransform( features=features, hidden_features=hidden_features, num_blocks=num_blocks_per_layer, use_residual_blocks=use_residual_blocks, random_mask=use_random_masks, activation=activation, dropout_probability=dropout_probability, use_batch_norm=batch_norm_within_layers, ) ) if batch_norm_between_layers: layers.append(BatchNorm(features)) super().__init__( transform=CompositeTransform(layers), distribution=StandardNormal([features]), )
def get_neural_posterior(model, parameter_dim, observation_dim, simulator): # Everything is a flow because we need to normalize parameters based on prior. mean, std = simulator.normalization_parameters normalizing_transform = AffineTransform(shift=-mean / std, scale=1 / std) if model == "mdn": hidden_features = 50 neural_posterior = MultivariateGaussianMDN( features=parameter_dim, context_features=observation_dim, hidden_features=hidden_features, hidden_net=nn.Sequential( nn.Linear(observation_dim, hidden_features), nn.ReLU(), nn.Dropout(p=0.0), nn.Linear(hidden_features, hidden_features), nn.ReLU(), nn.Linear(hidden_features, hidden_features), nn.ReLU(), ), num_components=20, custom_initialization=True, ) elif model == "made": num_mixture_components = 5 transform = normalizing_transform distribution = MADEMoG( features=parameter_dim, hidden_features=50, context_features=observation_dim, num_blocks=2, num_mixture_components=num_mixture_components, use_residual_blocks=True, random_mask=False, activation=F.relu, dropout_probability=0.0, use_batch_norm=False, custom_initialization=True, ) neural_posterior = Flow(transform, distribution) elif model == "maf": transform = CompositeTransform( [ CompositeTransform( [ transforms.MaskedAffineAutoregressiveTransform( features=parameter_dim, hidden_features=50, context_features=observation_dim, num_blocks=2, use_residual_blocks=False, random_mask=False, activation=F.tanh, dropout_probability=0.0, use_batch_norm=True, ), transforms.RandomPermutation(features=parameter_dim), ] ) for _ in range(5) ] ) transform = CompositeTransform([normalizing_transform, transform,]) distribution = StandardNormal((parameter_dim,)) neural_posterior = Flow(transform, distribution) elif model == "nsf": transform = CompositeTransform( [ CompositeTransform( [ transforms.PiecewiseRationalQuadraticCouplingTransform( mask=create_alternating_binary_mask( features=parameter_dim, even=(i % 2 == 0) ), transform_net_create_fn=lambda in_features, out_features: nets.ResidualNet( in_features=in_features, out_features=out_features, hidden_features=50, context_features=observation_dim, num_blocks=2, activation=F.relu, dropout_probability=0.0, use_batch_norm=False, ), num_bins=10, tails="linear", tail_bound=3.0, apply_unconditional_transform=False, ), transforms.LULinear(parameter_dim, identity_init=True), ] ) for i in range(5) ] ) distribution = StandardNormal((parameter_dim,)) neural_posterior = Flow(transform, distribution) else: raise ValueError return neural_posterior
def get_neural_likelihood(model, parameter_dim, observation_dim): if model == "mdn": hidden_features = 50 neural_likelihood = MultivariateGaussianMDN( features=observation_dim, context_features=parameter_dim, hidden_features=hidden_features, hidden_net=nn.Sequential( nn.Linear(parameter_dim, hidden_features), nn.BatchNorm1d(hidden_features), nn.ReLU(), nn.Dropout(p=0.0), nn.Linear(hidden_features, hidden_features), nn.BatchNorm1d(hidden_features), nn.ReLU(), nn.Linear(hidden_features, hidden_features), nn.BatchNorm1d(hidden_features), nn.ReLU(), ), num_components=20, custom_initialization=True, ) elif model == "made": neural_likelihood = MixtureOfGaussiansMADE( features=observation_dim, hidden_features=50, context_features=parameter_dim, num_blocks=4, num_mixture_components=10, use_residual_blocks=True, random_mask=False, activation=F.relu, use_batch_norm=True, dropout_probability=0.0, custom_initialization=True, ) elif model == "maf": transform = CompositeTransform( [ CompositeTransform( [ MaskedAffineAutoregressiveTransform( features=observation_dim, hidden_features=50, context_features=parameter_dim, num_blocks=2, use_residual_blocks=False, random_mask=False, activation=F.tanh, dropout_probability=0.0, use_batch_norm=True, ), RandomPermutation(features=observation_dim), ] ) for _ in range(5) ] ) distribution = StandardNormal((observation_dim,)) neural_likelihood = Flow(transform, distribution) elif model == "nsf": transform = CompositeTransform( [ CompositeTransform( [ PiecewiseRationalQuadraticCouplingTransform( mask=create_alternating_binary_mask( features=observation_dim, even=(i % 2 == 0) ), transform_net_create_fn=lambda in_features, out_features: nets.ResidualNet( in_features=in_features, out_features=out_features, hidden_features=50, context_features=parameter_dim, num_blocks=2, activation=F.relu, dropout_probability=0.0, use_batch_norm=False, ), num_bins=10, tails="linear", tail_bound=3.0, apply_unconditional_transform=False, ), LULinear(observation_dim, identity_init=True), ] ) for i in range(5) ] ) distribution = StandardNormal((observation_dim,)) neural_likelihood = Flow(transform, distribution) else: raise ValueError return neural_likelihood
base_dist = ConditionalDiagonalNormal(shape=[num_features], context_encoder=context_encoder) #base_dist = DiagonalNormal(shape=[3]) transforms = [] for _ in range(num_layers): transforms.append(ReversePermutation(features=num_features)) # transforms.append(MaskedAffineAutoregressiveTransform(features=num_features, # hidden_features=100)) transforms.append( MaskedAffineAutoregressiveTransform(features=num_features, hidden_features=80, context_features=num_features)) #transforms.append(MaskedUMNNAutoregressiveTransform(features=num_features, # hidden_features=4)) transform = CompositeTransform(transforms) flow = Flow(transform, base_dist).to(device) optimizer = optim.Adam(flow.parameters()) print("number of params: ", sum(p.numel() for p in flow.parameters())) def plot_histo_1D(real_vals, gen_vals, label_real="Physics Data", label_gen="NFlow Model", col2="blue", title="Physics vs NFlow Models", saveloc=None): fig, axes = plt.subplots(1, 4, figsize=(4 * 5, 5)) for INDEX, ax in zip((0, 1, 2, 3), axes):
def __init__(self, encoder, dim_z, decoder, normalize_latent_loss: bool, flow_arch: str, concat_midi_to_z0=False): """ :param encoder: CNN-based encoder, output might be smaller than dim_z (if concat MIDI pitch/vel) :param dim_z: Latent vectors dimension, including possibly concatenated MIDI pitch and velocity. :param decoder: :param normalize_latent_loss: :param flow_arch: Full string-description of the flow, e.g. 'realnvp_4l200' (flow type, number of flows, hidden features count, ...) :param concat_midi_to_z0: If True, encoder output mu and log(var) vectors must be smaller than dim_z, for this model to append MIDI pitch and velocity (see corresponding mu and log(var) in forward() implementation) # TODO add more flow params (hidden neural networks config: BN, layers, ...) """ super().__init__() # No size checks performed. Encoder and decoder must have been properly designed self.encoder = encoder self.dim_z = dim_z self.concat_midi_to_z0 = concat_midi_to_z0 self.decoder = decoder self.is_profiled = False self.normalize_latent_loss = normalize_latent_loss # Latent flow setup flow_args = flow_arch.split('_') if len(flow_args) < 2: raise AssertionError( "flow_arch argument must contains at least a flow type and layers description, " "e.g. 'realnvp_4l200'") elif len(flow_args) > 2: raise NotImplementedError( "Optional flow arch argument not supported yet") self.flow_arch = flow_args[0] flow_layers_args = flow_args[1].split('l') self.flow_layers_count = int(flow_layers_args[0]) self.flow_hidden_features = int(flow_layers_args[1]) if self.flow_arch.lower() == 'maf': transforms = [] for _ in range(self.flow_layers_count): transforms.append(ReversePermutation(features=self.dim_z)) transforms.append( MaskedAffineAutoregressiveTransform( features=self.dim_z, hidden_features=self.flow_hidden_features)) self.flow_transform = CompositeTransform(transforms) elif self.flow_arch.lower() == 'realnvp': flow = SimpleRealNVP( features=self.dim_z, hidden_features=self.flow_hidden_features, num_layers=self.flow_layers_count, num_blocks_per_layer=2, # MAAF layers default count batch_norm_within_layers=True, batch_norm_between_layers= False # True would prevent reversibility during train ) # Dirty quick trick, we want the tranform only, not the base distribution that we want to model ourselves... self.flow_transform = flow._transform else: raise NotImplementedError("Unavailable flow '{}'".format( self.flow_arch))
def __init__(self, architecture, dim_z, idx_helper: PresetIndexesHelper, dropout_p=0.0, fast_forward_flow=True, cat_softmax_activation=False): """ :param architecture: Flow automatically built from architecture string. E.g. 'realnvp_16l200' means 16 RealNVP flow layers with 200 hidden features each. Some options can be given after an underscore (e.g. '16l200_bn' adds batch norm). See implementation for more details. TODO implement suffix options :param dim_z: Size of a z_K latent vector, which is also the output size for this invertible normalizing flow. :param idx_helper: :param dropout_p: TODO implement dropout (in all but the last flow layers) :param fast_forward_flow: If True, the flow transform will be built such that it is fast (and memory-efficient) in the forward direction (else, it will be fast in the inverse direction). Moreover, if batch-norm is used between layers, the flow can be trained only its 'fast' direction (which can be forward or inverse depending on this argument). """ super().__init__() self.dim_z = dim_z self.idx_helper = idx_helper self._fast_forward_flow = fast_forward_flow arch_args = architecture.split('_') if len(arch_args) < 2: raise AssertionError("Unvalid architecture string argument '{}' does not contain enough information" .format(architecture)) elif len(arch_args) == 2: # No opt args (default) self.flow_type = arch_args[0] self.num_flow_layers, self.num_flow_hidden_features = arch_args[1].split('l') self.num_flow_layers = int(self.num_flow_layers) self.num_flow_hidden_features = int(self.num_flow_hidden_features) # Default: full BN usage self.bn_between_flows = True self.bn_within_flows = True else: raise NotImplementedError("Arch suffix arguments not implemented yet (too many arch args given in '{}')" .format(architecture)) # Multi-layer flow definition if self.flow_type.lower() == 'realnvp' or self.flow_type.lower() == 'rnvp': # RealNVP - custom (without useless gaussian base distribution) and no BN on last layers self._forward_flow_transform = CustomRealNVP(self.dim_z, self.num_flow_hidden_features, self.num_flow_layers, num_blocks_per_layer=2, # MAF default batch_norm_between_layers=self.bn_between_flows, batch_norm_within_layers=self.bn_within_flows, dropout_probability=dropout_p ) elif self.flow_type.lower() == 'maf': transforms = [] for l in range(self.num_flow_layers): transforms.append(ReversePermutation(features=self.dim_z)) # TODO Batch norm added on all flow MLPs but the 2 last # and dropout p transforms.append(MaskedAffineAutoregressiveTransform(features=self.dim_z, hidden_features=self.num_flow_hidden_features, use_batch_norm=False, # TODO (l < num_layers-2), dropout_probability=0.5 # TODO as param )) # The inversed maf flow should never (cannot...) be used during training: # - much slower than forward (in nflows implementation) # - very unstable # - needs ** huge ** amounts of GPU RAM self._forward_flow_transform = CompositeTransform(transforms) # Fast forward # TODO rename self.activation_layer = PresetActivation(self.idx_helper, cat_softmax_activation=cat_softmax_activation)
def set_up_networks(lower_post_limits, upper_post_limits, dim_post=12, dim_summary_stat=19, seed=10): torch.manual_seed(seed) base_dist_lik = StandardNormal(shape=[dim_summary_stat]) num_layers = 4 transforms = [] for _ in range(num_layers): # TODO add inv sigmoide fnunc transforms.append(ReversePermutation(features=dim_summary_stat)) transforms.append(MaskedAffineAutoregressiveTransform(features=dim_summary_stat, hidden_features=40, context_features=dim_post, num_blocks=2)) transform = CompositeTransform(transforms) flow_lik = Flow(transform, base_dist_lik) base_dist_post = StandardNormal(shape=[dim_post]) #base_dist_post = ConditionalDiagonalNormal(shape=[dim_post], context_encoder=nn.Linear(dim_summary_stat, 2*dim_post)) #base_dist_post = UniformContext.UniformContext(low=lower_post_limits, high=upper_post_limits, shape=[dim_post]) num_layers = 4 transforms = [] # def post model in prior snplace shift_vec = torch.zeros(dim_post) scale_vec = torch.zeros(dim_post) num_off_set = 0.001 # numerical offset since the prior is on the open space print("set priors") print(upper_post_limits) print(lower_post_limits) for i in range(dim_post): shift_tmp, scale_tmp = calc_scale_and_shift(lower_post_limits[i], upper_post_limits[i]) shift_vec[i] = shift_tmp scale_vec[i] = scale_tmp #print(shift_vec) #print(scale_vec) print(shift_vec) print(scale_vec) # last transformation transforms.append(PointwiseAffineTransform(shift=shift_vec, scale=scale_vec)) transforms.append(InvSigmoid.InvSigmoid()) # this should be inv sigmoide! for _ in range(num_layers): transforms.append(ReversePermutation(features=dim_post)) transforms.append(MaskedAffineAutoregressiveTransform(features=dim_post, hidden_features=50, context_features=dim_summary_stat, num_blocks=2)) transform = CompositeTransform(transforms) flow_post = Flow(transform, base_dist_post) return flow_lik, flow_post