def __init__(self, depth=9): self.depth = depth depth = 9 train = (3, 32, 32) nfms = [2**(stage + 4) for stage in sorted(list(range(3)) * depth)] strides = [ 1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1]) ] # Now construct the network layers = [Conv(**self.conv_params(3, 16))] layers.append(self.module_s1(nfms[0], True)) for nfm, stride in zip(nfms[1:], strides): res_module = self.module_s1( nfm) if stride == 1 else self.module_s2(nfm) layers.append(res_module) layers.append(BatchNorm()) layers.append(Activation(Rectlin())) layers.append(Pooling('all', op='avg')) layers.append( Affine(10, init=Kaiming(local=False), batch_norm=True, activation=Softmax())) self.layers = layers model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.initialize(train, cost=cost) self.model = model
def create_network(stage_depth, bottleneck): if stage_depth in (0, 18): stages = (2, 2, 2, 2) elif stage_depth in (1, 34, 50): stages = (3, 4, 6, 3) elif stage_depth in (2, 68, 101): stages = (3, 4, 23, 3) elif stage_depth in (3, 102, 152): stages = (3, 8, 36, 3) elif stage_depth in (4, 98, 138): stages = (3, 7, 35, 3) else: raise ValueError('Invalid stage_depth value'.format(stage_depth)) layers = [Conv(**conv_params(7, 64, strides=2)), Pooling(3, strides=2)] # Structure of the deep residual part of the network: # stage_depth modules of 2 convolutional layers each at feature map depths # of 64, 128, 256, 512 nfms = list( itt.chain.from_iterable( [itt.repeat(2**(x + 6), r) for x, r in enumerate(stages)])) strides = [-1] + [ 1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1]) ] for nfm, stride in zip(nfms, strides): layers.append(module_factory(nfm, bottleneck, stride)) layers.append(Pooling('all', op='avg')) layers.append(Conv(**conv_params(1, 1000, relu=False))) layers.append(Activation(Softmax())) return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyMulti())
def projection_skip(nfm, stride=1): mainpath = [Conv(**conv_params(3, nfm, stride=stride)), Conv(**conv_params(3, nfm, relu=False))] sidepath = [SkipNode() if stride == 1 else Conv(**conv_params(1, nfm, stride, relu=False))] module = [MergeSum([mainpath, sidepath]), Activation(Rectlin())] return module
def create_network(stage_depth): # Structure of the deep residual part of the network: # stage_depth modules of 2 convolutional layers each at feature map depths of 16, 32, 64 nfms = [2**(stage + 4) for stage in sorted(list(range(3)) * stage_depth)] strides = [ 1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1]) ] # Now construct the network layers = [Conv(**conv_params(3, 16))] layers.append(module_s1(nfms[0], True)) for nfm, stride in zip(nfms[1:], strides): res_module = module_s1(nfm) if stride == 1 else module_s2(nfm) layers.append(res_module) layers.append(BatchNorm()) layers.append(Activation(Rectlin())) layers.append(Pooling('all', op='avg')) layers.append( Affine(10, init=Kaiming(local=False), batch_norm=True, activation=Softmax())) return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyMulti())
def deconv_layer(name, n_feature, ker_size=4, strides=2, padding=1, activation=lrelu, batch_norm=True, bias=None): """ Layer configuration for deep-convolutional (DC) discriminator Arguments: name (string): Layer name' n_feature (int): Number of output feature maps ker_size (int): Size of convolutional kernel (defaults to 4) strides (int): Stride of convolution (defaults to 2) padding (int): Padding of convolution (defaults to 1) activation (object): Activation function (defaults to leaky ReLu) batch_norm(bool): Enable batch normalization (defaults to True) """ layers = [] layers.append( Deconvolution(fshape=(ker_size, ker_size, n_feature), strides=strides, padding=padding, dilation={}, init=init_w, bsum=batch_norm, name=name)) if batch_norm: layers.append(BatchNorm(name=name + '_bnorm', **bn_prm)) if bias is not None: layers.append(Bias(init=None, name=name + '_bias')) layers.append(Activation(transform=activation, name=name + '_rectlin')) return layers
def module_factory(nfm, stride=1): projection = None if stride == 1 else IdentityInit() module = [ Conv(**conv_params(3, nfm, stride=stride)), Conv(**conv_params(3, nfm, relu=False)) ] module = module if args.network == 'plain' else [ ResidualModule(module, projection) ] module.append(Activation(Rectlin())) return module
def module_s1(nfm, first=False): ''' non-strided ''' sidepath = Conv( **conv_params(1, nfm * 4, 1, False, False)) if first else SkipNode() mainpath = [] if first else [BatchNorm(), Activation(Rectlin())] mainpath.append(Conv(**conv_params(1, nfm))) mainpath.append(Conv(**conv_params(3, nfm))) mainpath.append( Conv(**conv_params(1, nfm * 4, relu=False, batch_norm=False))) return MergeSum([sidepath, mainpath])
def module_s2(nfm): ''' strided ''' module = [BatchNorm(), Activation(Rectlin())] mainpath = [ Conv(**conv_params(1, nfm, stride=2)), Conv(**conv_params(3, nfm)), Conv(**conv_params(1, nfm * 4, relu=False, batch_norm=False)) ] sidepath = [ Conv(**conv_params(1, nfm * 4, stride=2, relu=False, batch_norm=False)) ] module.append(MergeSum([sidepath, mainpath])) return module
def module_factory(nfm, bottleneck=True, stride=1): nfm_out = nfm * 4 if bottleneck else nfm use_skip = True if stride == 1 else False stride = abs(stride) sidepath = [SkipNode() if use_skip else Conv( **conv_params(1, nfm_out, stride, False))] if bottleneck: mainpath = [Conv(**conv_params(1, nfm, stride)), Conv(**conv_params(3, nfm)), Conv(**conv_params(1, nfm_out, relu=False))] else: mainpath = [Conv(**conv_params(3, nfm, stride)), Conv(**conv_params(3, nfm, relu=False))] return [MergeSum([mainpath, sidepath]), Activation(Rectlin())]
def create_network(): layers = [ DataTransform(transform=Normalizer(divisor=128.)), Conv((11, 11, 96), init=Kaiming(), activation=Rectlin(), strides=4, padding=1), Conv((1, 1, 96), init=Kaiming(), activation=Rectlin(), strides=1), Conv((3, 3, 96), init=Kaiming(), activation=Rectlin(), strides=2, padding=1), # 54->2, Conv((5, 5, 256), init=Kaiming(), activation=Rectlin(), strides=1), # 27->2, Conv((1, 1, 256), init=Kaiming(), activation=Rectlin(), strides=1), Conv((3, 3, 256), init=Kaiming(), activation=Rectlin(), strides=2, padding=1), # 23->1, Conv((3, 3, 384), init=Kaiming(), activation=Rectlin(), strides=1, padding=1), Conv((1, 1, 384), init=Kaiming(), activation=Rectlin(), strides=1), Conv((3, 3, 384), init=Kaiming(), activation=Rectlin(), strides=2, padding=1), # 12->, Dropout(keep=0.5), Conv((3, 3, 1024), init=Kaiming(), activation=Rectlin(), strides=1, padding=1), Conv((1, 1, 1024), init=Kaiming(), activation=Rectlin(), strides=1), Conv((1, 1, 1000), init=Kaiming(), activation=Rectlin(), strides=1), Pooling(6, op='avg'), Activation(Softmax()) ] return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyMulti())
def mlp_layer(name, nout, activation=relu, batch_norm=False, bias=None): """ Layer configuration for MLP generator/discriminator Arguments: name (string): Layer name nout (int): Number of output feature maps activation (object): Activation function (defaults to ReLu) batch_norm(bool): Enable batch normalization (defaults to False) """ layers = [] layers.append(Linear(nout=nout, init=init_w, bsum=batch_norm, name=name)) if batch_norm: layers.append(BatchNorm(name=name + '_bnorm', **bn_prm)) if bias is not None: layers.append(Bias(init=None, name=name + '_bias')) layers.append(Activation(transform=activation, name=name + '_rectlin')) return layers
def create_network(stage_depth): if stage_depth in (18, 18): stages = (2, 2, 2, 2) elif stage_depth in (34, 50): stages = (3, 4, 6, 3) elif stage_depth in (68, 101): stages = (3, 4, 23, 3) elif stage_depth in (102, 152): stages = (3, 8, 36, 3) else: raise ValueError('Invalid stage_depth value'.format(stage_depth)) bottleneck = False if stage_depth in (50, 101, 152): bottleneck = True layers = [Conv(name='Input Layer', **conv_params(7, 64, strides=2)), Pooling(3, strides=2)] # Structure of the deep residual part of the network: # stage_depth modules of 2 convolutional layers each at feature map depths # of 64, 128, 256, 512 nfms = list(itt.chain.from_iterable( [itt.repeat(2**(x + 6), r) for x, r in enumerate(stages)])) strides = [-1] + [1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])] for nfm, stride in zip(nfms, strides): layers.append(module_factory(nfm, bottleneck, stride)) layers.append(Pooling('all', op='avg', name='end_resnet')) layers.append(Conv(name = 'Custom Head 1', **conv_params(1, 1000, relu=True))) layers.append(Dropout(0.5)) layers.append(Conv(name = 'Custom Head 2', **conv_params(1, 2, relu=False))) layers.append(Activation(Softmax())) # layers.append(Affine(512, init=Kaiming(local=False), # batch_norm=True, activation=Rectlin())) # layers.append(Affine(2, init=Kaiming(local=False), activation=Softmax())) return Model(layers=layers)
def __init__(self): self.in_shape = (3, 32, 32) relu = Rectlin() init_use = Constant(0) conv = dict(init=init_use, batch_norm=False, activation=relu) convp1 = dict(init=init_use, batch_norm=False, bias=init_use, activation=relu, padding=1) convp1s2 = dict(init=init_use, batch_norm=False, bias=init_use, padding=1, strides=2) layers = [ Dropout(keep=.8), Conv((3, 3, 96), **convp1), Conv((3, 3, 96), **convp1), Conv((3, 3, 96), **convp1s2), Dropout(keep=.5), Conv((3, 3, 192), **convp1), Conv((3, 3, 192), **convp1), Conv((3, 3, 192), **convp1s2), Dropout(keep=.5), Conv((3, 3, 192), **convp1), Conv((1, 1, 192), **conv), Conv((1, 1, 16), **conv), Pooling(8, op="avg"), Activation(Softmax()) ] self.layers = layers model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.initialize(self.in_shape, cost=cost) self.model = model
layers.append(Conv((5, 5, 256), init=init_uni, activation=relu, strides=1)) # 27->23 layers.append(Conv((1, 1, 256), init=init_uni, activation=relu, strides=1)) layers.append(Conv((3, 3, 256), init=init_uni, activation=relu, strides=2, padding=1)) # 23->12 layers.append(Conv((3, 3, 384), init=init_uni, activation=relu, strides=1, padding=1)) layers.append(Conv((1, 1, 384), init=init_uni, activation=relu, strides=1)) layers.append(Conv((3, 3, 384), init=init_uni, activation=relu, strides=2, padding=1)) # 12->6 layers.append(Dropout(keep=0.5)) layers.append(Conv((3, 3, 1024), init=init_uni, activation=relu, strides=1, padding=1)) layers.append(Conv((1, 1, 1024), init=init_uni, activation=relu, strides=1)) layers.append(Conv((1, 1, 1000), init=init_uni, activation=relu, strides=1)) layers.append(Pooling(6, op='avg')) layers.append(Activation(Softmax())) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) mlp = Model(layers=layers) if args.model_file: import os assert os.path.exists(args.model_file), '%s not found' % args.model_file mlp.load_weights(args.model_file) # configure callbacks callbacks = Callbacks(mlp, train, eval_set=test, **args.callback_args) if args.deconv: callbacks.add_deconv_callback(train, test)
# Structure of the deep residual part of the network: # args.depth modules of 2 convolutional layers each at feature map depths of 16, 32, 64 nfms = [2**(stage + 4) for stage in sorted(range(3) * args.depth)] strides = [1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])] # Now construct the network layers = [Conv(**conv_params(3, 16))] layers.append(module_s1(nfms[0], True)) for nfm, stride in zip(nfms[1:], strides): res_module = module_s1(nfm) if stride == 1 else module_s2(nfm) layers.append(res_module) layers.append(BatchNorm()) layers.append(Activation(Rectlin())) layers.append(Pooling('all', op='avg')) layers.append( Affine(10, init=Kaiming(local=False), batch_norm=True, activation=Softmax())) model = Model(layers=layers) opt = GradientDescentMomentum(0.1, 0.9, wdecay=0.0001, schedule=Schedule([82, 124], 0.1)) # configure callbacks valmetric = Misclassification()
layers = [ Dropout(keep=.8), Conv((3, 3, 96), **convp1), Conv((3, 3, 96), **convp1), Conv((3, 3, 96), **convp1s2), Dropout(keep=.5), Conv((3, 3, 192), **convp1), Conv((3, 3, 192), **convp1), Conv((3, 3, 192), **convp1s2), Dropout(keep=.5), Conv((3, 3, 192), **convp1), Conv((1, 1, 192), **conv), Conv((1, 1, 16), **conv), Pooling(8, op="avg"), Activation(Softmax()) ] cost = GeneralizedCost(costfunc=CrossEntropyMulti()) mlp = Model(layers=layers) # configure callbacks callbacks = Callbacks(mlp) def do_nothing(_): pass callbacks.callbacks = [] callbacks.on_train_begin = do_nothing callbacks.on_epoch_end = do_nothing
layers = [ Dropout(keep=.8), Conv((3, 3, 96), **conv), Conv((3, 3, 96), **convp1), Conv((3, 3, 96), **convp1s2), Dropout(keep=.5), Conv((3, 3, 192), **convp1), Conv((3, 3, 192), **convp1), Conv((3, 3, 192), **convp1s2), Dropout(keep=.5), Conv((3, 3, 192), **conv), Conv((1, 1, 192), **conv), Conv((1, 1, 16), init=init_uni, activation=relu), Pooling(6, op="avg"), Activation(Rectlin()) ] cost = GeneralizedCost(costfunc=SumSquared()) mlp = Model(layers=layers) # configure callbacks callbacks = Callbacks(mlp, train_set, eval_set=valid_set, **args.callback_args) mlp.fit(train_set, optimizer=opt_gdm, num_epochs=num_epochs, cost=cost, callbacks=callbacks) print('Misclassification error = %.1f%%' %