def __init__( self, initial_size, idims=(32,), nonlinearity="softplus", squeeze=True, init_layer=None, n_blocks=1, cnf_kwargs={}, ): strides = tuple([1] + [1 for _ in idims]) chain = [] if init_layer is not None: chain.append(init_layer) def _make_odefunc(size): net = ODEnet(idims, size, strides, True, layer_type="concat", nonlinearity=nonlinearity) f = layers.ODEfunc(net) return f if squeeze: c, h, w = initial_size after_squeeze_size = c * 4, h // 2, w // 2 pre = [layers.CNF(_make_odefunc(initial_size), **cnf_kwargs) for _ in range(n_blocks)] post = [layers.CNF(_make_odefunc(after_squeeze_size), **cnf_kwargs) for _ in range(n_blocks)] chain += pre + [layers.SqueezeLayer(2)] + post else: chain += [layers.CNF(_make_odefunc(initial_size), **cnf_kwargs) for _ in range(n_blocks)] super(StackedCNFLayers, self).__init__(chain)
try: n_classes except NameError: raise ValueError('Cannot perform classification with {}'.format(args.data)) else: n_classes = 1 logger.info('Dataset loaded.') logger.info('Creating model.') input_size = (args.batchsize, im_dim + args.padding, args.imagesize, args.imagesize) dataset_size = len(train_loader.dataset) if args.squeeze_first: input_size = (input_size[0], input_size[1] * 4, input_size[2] // 2, input_size[3] // 2) squeeze_layer = layers.SqueezeLayer(2) # Model model = ResidualFlow( input_size, n_blocks=list(map(int, args.nblocks.split('-'))), intermediate_dim=args.idim, factor_out=args.factor_out, quadratic=args.quadratic, init_layer=init_layer, actnorm=args.actnorm, fc_actnorm=args.fc_actnorm, batchnorm=args.batchnorm, dropout=args.dropout, fc=args.fc, coeff=args.coeff,
def __init__( self, initial_size, idim, squeeze=True, init_layer=None, n_blocks=1, quadratic=False, actnorm=False, fc_actnorm=False, batchnorm=False, dropout=0, fc=False, coeff=0.9, vnorms='122f', n_lipschitz_iters=None, sn_atol=None, sn_rtol=None, n_power_series=5, n_dist='geometric', n_samples=1, kernels='3-1-3', activation_fn='elu', fc_end=True, fc_nblocks=4, fc_idim=128, n_exact_terms=0, preact=False, neumann_grad=True, grad_in_forward=False, first_resblock=False, learn_p=False, ): # yapf: disable class nonloc_scope: pass nonloc_scope.swap = True # yapf: enable chain = [] def _actnorm(size, fc): if fc: return FCWrapper(layers.ActNorm1d(size[0] * size[1] * size[2])) else: return layers.ActNorm2d(size[0]) def _quadratic_layer(initial_size, fc): if fc: c, h, w = initial_size dim = c * h * w return FCWrapper(layers.InvertibleLinear(dim)) else: return layers.InvertibleConv2d(initial_size[0]) def _weight_layer(fc): return nn.Linear if fc else nn.Conv2d def _resblock(initial_size, fc, idim=idim, first_resblock=False): if fc: nonloc_scope.swap = not nonloc_scope.swap return layers.CouplingBlock( initial_size[0], FCNet( input_shape=initial_size, idim=idim, lipschitz_layer=_weight_layer(True), nhidden=len(kernels.split('-')) - 1, activation_fn=activation_fn, preact=preact, dropout=dropout, coeff=None, domains=None, codomains=None, n_iterations=None, sn_atol=None, sn_rtol=None, learn_p=None, div_in=2, ), swap=nonloc_scope.swap, ) else: ks = list(map(int, kernels.split('-'))) if init_layer is None: _block = layers.ChannelCouplingBlock _mask_type = 'channel' div_in = 2 mult_out = 1 else: _block = layers.MaskedCouplingBlock _mask_type = 'checkerboard' div_in = 1 mult_out = 2 nonloc_scope.swap = not nonloc_scope.swap _mask_type += '1' if nonloc_scope.swap else '0' nnet = [] if not first_resblock and preact: if batchnorm: nnet.append(layers.MovingBatchNorm2d(initial_size[0])) nnet.append(ACT_FNS[activation_fn](False)) nnet.append( _weight_layer(fc)(initial_size[0] // div_in, idim, ks[0], 1, ks[0] // 2)) if batchnorm: nnet.append(layers.MovingBatchNorm2d(idim)) nnet.append(ACT_FNS[activation_fn](True)) for i, k in enumerate(ks[1:-1]): nnet.append(_weight_layer(fc)(idim, idim, k, 1, k // 2)) if batchnorm: nnet.append(layers.MovingBatchNorm2d(idim)) nnet.append(ACT_FNS[activation_fn](True)) if dropout: nnet.append(nn.Dropout2d(dropout, inplace=True)) nnet.append( _weight_layer(fc)(idim, initial_size[0] * mult_out, ks[-1], 1, ks[-1] // 2)) if batchnorm: nnet.append(layers.MovingBatchNorm2d(initial_size[0])) return _block(initial_size[0], nn.Sequential(*nnet), mask_type=_mask_type) if init_layer is not None: chain.append(init_layer) if first_resblock and actnorm: chain.append(_actnorm(initial_size, fc)) if first_resblock and fc_actnorm: chain.append(_actnorm(initial_size, True)) if squeeze: c, h, w = initial_size for i in range(n_blocks): if quadratic: chain.append(_quadratic_layer(initial_size, fc)) chain.append( _resblock(initial_size, fc, first_resblock=first_resblock and (i == 0))) if actnorm: chain.append(_actnorm(initial_size, fc)) if fc_actnorm: chain.append(_actnorm(initial_size, True)) chain.append(layers.SqueezeLayer(2)) else: for _ in range(n_blocks): if quadratic: chain.append(_quadratic_layer(initial_size, fc)) chain.append(_resblock(initial_size, fc)) if actnorm: chain.append(_actnorm(initial_size, fc)) if fc_actnorm: chain.append(_actnorm(initial_size, True)) # Use four fully connected layers at the end. if fc_end: for _ in range(fc_nblocks): chain.append(_resblock(initial_size, True, fc_idim)) if actnorm or fc_actnorm: chain.append(_actnorm(initial_size, True)) super(StackedCouplingBlocks, self).__init__(chain)
def __init__( self, initial_size, idim, squeeze=True, init_layer=None, n_blocks=1, quadratic=False, actnorm=False, fc_actnorm=False, batchnorm=False, dropout=0, fc=False, coeff=0.9, vnorms='122f', n_lipschitz_iters=None, sn_atol=None, sn_rtol=None, n_power_series=5, n_dist='geometric', n_samples=1, kernels='3-1-3', activation_fn='elu', fc_end=True, fc_nblocks=4, fc_idim=128, n_exact_terms=0, preact=False, neumann_grad=True, grad_in_forward=False, first_resblock=False, learn_p=False, ): chain = [] # Parse vnorms ps = [] for p in vnorms: if p == 'f': ps.append(float('inf')) else: ps.append(float(p)) domains, codomains = ps[:-1], ps[1:] assert len(domains) == len(kernels.split('-')) def _actnorm(size, fc): if fc: return FCWrapper(layers.ActNorm1d(size[0] * size[1] * size[2])) else: return layers.ActNorm2d(size[0]) def _quadratic_layer(initial_size, fc): if fc: c, h, w = initial_size dim = c * h * w return FCWrapper(layers.InvertibleLinear(dim)) else: return layers.InvertibleConv2d(initial_size[0]) def _lipschitz_layer(fc): return base_layers.get_linear if fc else base_layers.get_conv2d def _resblock(initial_size, fc, idim=idim, first_resblock=False): if fc: return layers.iResBlock( FCNet( input_shape=initial_size, idim=idim, lipschitz_layer=_lipschitz_layer(True), nhidden=len(kernels.split('-')) - 1, coeff=coeff, domains=domains, codomains=codomains, n_iterations=n_lipschitz_iters, activation_fn=activation_fn, preact=preact, dropout=dropout, sn_atol=sn_atol, sn_rtol=sn_rtol, learn_p=learn_p, ), n_power_series=n_power_series, n_dist=n_dist, n_samples=n_samples, n_exact_terms=n_exact_terms, neumann_grad=neumann_grad, grad_in_forward=grad_in_forward, ) else: ks = list(map(int, kernels.split('-'))) if learn_p: _domains = [ nn.Parameter(torch.tensor(0.)) for _ in range(len(ks)) ] _codomains = _domains[1:] + [_domains[0]] else: _domains = domains _codomains = codomains nnet = [] if not first_resblock and preact: if batchnorm: nnet.append(layers.MovingBatchNorm2d(initial_size[0])) nnet.append(ACT_FNS[activation_fn](False)) nnet.append( _lipschitz_layer(fc)(initial_size[0], idim, ks[0], 1, ks[0] // 2, coeff=coeff, n_iterations=n_lipschitz_iters, domain=_domains[0], codomain=_codomains[0], atol=sn_atol, rtol=sn_rtol)) if batchnorm: nnet.append(layers.MovingBatchNorm2d(idim)) nnet.append(ACT_FNS[activation_fn](True)) for i, k in enumerate(ks[1:-1]): nnet.append( _lipschitz_layer(fc)(idim, idim, k, 1, k // 2, coeff=coeff, n_iterations=n_lipschitz_iters, domain=_domains[i + 1], codomain=_codomains[i + 1], atol=sn_atol, rtol=sn_rtol)) if batchnorm: nnet.append(layers.MovingBatchNorm2d(idim)) nnet.append(ACT_FNS[activation_fn](True)) if dropout: nnet.append(nn.Dropout2d(dropout, inplace=True)) nnet.append( _lipschitz_layer(fc)(idim, initial_size[0], ks[-1], 1, ks[-1] // 2, coeff=coeff, n_iterations=n_lipschitz_iters, domain=_domains[-1], codomain=_codomains[-1], atol=sn_atol, rtol=sn_rtol)) if batchnorm: nnet.append(layers.MovingBatchNorm2d(initial_size[0])) return layers.iResBlock( nn.Sequential(*nnet), n_power_series=n_power_series, n_dist=n_dist, n_samples=n_samples, n_exact_terms=n_exact_terms, neumann_grad=neumann_grad, grad_in_forward=grad_in_forward, ) if init_layer is not None: chain.append(init_layer) if first_resblock and actnorm: chain.append(_actnorm(initial_size, fc)) if first_resblock and fc_actnorm: chain.append(_actnorm(initial_size, True)) if squeeze: c, h, w = initial_size for i in range(n_blocks): if quadratic: chain.append(_quadratic_layer(initial_size, fc)) chain.append( _resblock(initial_size, fc, first_resblock=first_resblock and (i == 0))) if actnorm: chain.append(_actnorm(initial_size, fc)) if fc_actnorm: chain.append(_actnorm(initial_size, True)) chain.append(layers.SqueezeLayer(2)) else: for _ in range(n_blocks): if quadratic: chain.append(_quadratic_layer(initial_size, fc)) chain.append(_resblock(initial_size, fc)) if actnorm: chain.append(_actnorm(initial_size, fc)) if fc_actnorm: chain.append(_actnorm(initial_size, True)) # Use four fully connected layers at the end. if fc_end: for _ in range(fc_nblocks): chain.append(_resblock(initial_size, True, fc_idim)) if actnorm or fc_actnorm: chain.append(_actnorm(initial_size, True)) super(StackediResBlocks, self).__init__(chain)