def build_model(self, input_var, forward, dropout): net = dict() net['input'] = InputLayer((None, 3, None, None), input_var=input_var) net['conv1/7x7_s2'] = ConvLayer(net['input'], 64, 7, stride=2, pad=3, flip_filters=False) net['pool1/3x3_s2'] = PoolLayer(net['conv1/7x7_s2'], pool_size=3, stride=2, ignore_border=False) net['pool1/norm1'] = LRNLayer(net['pool1/3x3_s2'], alpha=0.00002, k=1) net['conv2/3x3_reduce'] = ConvLayer(net['pool1/norm1'], 64, 1, flip_filters=False) net['conv2/3x3'] = ConvLayer(net['conv2/3x3_reduce'], 192, 3, pad=1, flip_filters=False) net['conv2/norm2'] = LRNLayer(net['conv2/3x3'], alpha=0.00002, k=1) net['pool2/3x3_s2'] = PoolLayerDNN(net['conv2/norm2'], pool_size=3, stride=2) net.update( self.build_inception_module('inception_3a', net['pool2/3x3_s2'], [32, 64, 96, 128, 16, 32])) net.update( self.build_inception_module('inception_3b', net['inception_3a/output'], [64, 128, 128, 192, 32, 96])) net['pool3/3x3_s2'] = PoolLayerDNN(net['inception_3b/output'], pool_size=3, stride=2) net.update( self.build_inception_module('inception_4a', net['pool3/3x3_s2'], [64, 192, 96, 208, 16, 48])) net.update( self.build_inception_module('inception_4b', net['inception_4a/output'], [64, 160, 112, 224, 24, 64])) net.update( self.build_inception_module('inception_4c', net['inception_4b/output'], [64, 128, 128, 256, 24, 64])) net.update( self.build_inception_module('inception_4d', net['inception_4c/output'], [64, 112, 144, 288, 32, 64])) net.update( self.build_inception_module('inception_4e', net['inception_4d/output'], [128, 256, 160, 320, 32, 128])) net['pool4/3x3_s2'] = PoolLayerDNN(net['inception_4e/output'], pool_size=3, stride=2) net.update( self.build_inception_module('inception_5a', net['pool4/3x3_s2'], [128, 256, 160, 320, 32, 128])) net.update( self.build_inception_module('inception_5b', net['inception_5a/output'], [128, 384, 192, 384, 48, 128])) net['pool5/7x7_s1'] = GlobalPoolLayer(net['inception_5b/output']) if forward: #net['fc6'] = DenseLayer(net['pool5/7x7_s1'], num_units=1000) net['prob'] = DenseLayer(net['pool5/7x7_s1'], num_units=4, nonlinearity=softmax) else: net['dropout1'] = DropoutLayer(net['pool5/7x7_s1'], p=dropout) #net['fc6'] = DenseLayer(net['dropout1'], num_units=1000) #net['dropout2'] = DropoutLayer(net['fc6'], p=dropout) net['prob'] = DenseLayer(net['dropout1'], num_units=4, nonlinearity=softmax) return net
def build_model(batch_size=BATCH_SIZE): """ Compile net architecture """ nonlin = lasagne.nonlinearities.rectify # --- input layers --- l_in = lasagne.layers.InputLayer(shape=(None, INPUT_SHAPE[0], INPUT_SHAPE[1], INPUT_SHAPE[2]), name='Input') net = l_in nf = 64 # --- conv layers --- net = Conv2DLayer(net, num_filters=nf, filter_size=5, stride=2, pad=2, W=init_conv(gain="relu"), nonlinearity=nonlin) net = batch_norm(net, alpha=0.1) net = Conv2DLayer(net, num_filters=nf, filter_size=3, stride=1, pad=1, W=init_conv(gain="relu"), nonlinearity=nonlin) net = batch_norm(net, alpha=0.1) net = MaxPool2DLayer(net, pool_size=2) net = DropoutLayer(net, p=0.3) net = Conv2DLayer(net, num_filters=2 * nf, filter_size=3, stride=1, pad=1, W=init_conv(gain="relu"), nonlinearity=nonlin) net = batch_norm(net, alpha=0.1) net = Conv2DLayer(net, num_filters=2 * nf, filter_size=3, stride=1, pad=1, W=init_conv(gain="relu"), nonlinearity=nonlin) net = batch_norm(net, alpha=0.1) net = MaxPool2DLayer(net, pool_size=2) net = DropoutLayer(net, p=0.3) net = Conv2DLayer(net, num_filters=4 * nf, filter_size=3, stride=1, pad=1, W=init_conv(gain="relu"), nonlinearity=nonlin) net = batch_norm(net, alpha=0.1) net = DropoutLayer(net, p=0.3) net = Conv2DLayer(net, num_filters=4 * nf, filter_size=3, stride=1, pad=1, W=init_conv(gain="relu"), nonlinearity=nonlin) net = batch_norm(net, alpha=0.1) net = DropoutLayer(net, p=0.3) net = Conv2DLayer(net, num_filters=6 * nf, filter_size=3, stride=1, pad=1, W=init_conv(gain="relu"), nonlinearity=nonlin) net = batch_norm(net, alpha=0.1) net = DropoutLayer(net, p=0.3) net = Conv2DLayer(net, num_filters=6 * nf, filter_size=3, stride=1, pad=1, W=init_conv(gain="relu"), nonlinearity=nonlin) net = batch_norm(net, alpha=0.1) net = MaxPool2DLayer(net, pool_size=2) net = DropoutLayer(net, p=0.3) net = Conv2DLayer(net, num_filters=8 * nf, filter_size=3, stride=1, pad=1, W=init_conv(gain="relu"), nonlinearity=nonlin) net = batch_norm(net, alpha=0.1) net = Conv2DLayer(net, num_filters=8 * nf, filter_size=3, stride=1, pad=1, W=init_conv(gain="relu"), nonlinearity=nonlin) net = batch_norm(net, alpha=0.1) net = MaxPool2DLayer(net, pool_size=(1, 2)) net = DropoutLayer(net, p=0.3) net = Conv2DLayer(net, num_filters=8 * nf, filter_size=3, stride=1, pad=1, W=init_conv(gain="relu"), nonlinearity=nonlin) net = batch_norm(net, alpha=0.1) net = Conv2DLayer(net, num_filters=8 * nf, filter_size=3, stride=1, pad=1, W=init_conv(gain="relu"), nonlinearity=nonlin) net = batch_norm(net, alpha=0.1) net = MaxPool2DLayer(net, pool_size=(1, 2)) net = DropoutLayer(net, p=0.3) net = Conv2DLayer(net, num_filters=8 * nf, filter_size=3, pad=0, W=init_conv(gain="relu"), nonlinearity=nonlin) net = batch_norm(net, alpha=0.1) net = DropoutLayer(net, p=0.5) net = Conv2DLayer(net, num_filters=8 * nf, filter_size=1, pad=0, W=init_conv(gain="relu"), nonlinearity=nonlin) net = batch_norm(net, alpha=0.1) net = DropoutLayer(net, p=0.5) # --- feed forward part --- net = Conv2DLayer(net, num_filters=41, filter_size=1, W=init_conv(gain="relu"), nonlinearity=None) net = batch_norm(net, alpha=0.1) net = GlobalPoolLayer(net) net = FlattenLayer(net) net = NonlinearityLayer(net, nonlinearity=lasagne.nonlinearities.softmax) return net
def build_cnn(input_var=None, n=5): # create a residual learning building block with two stacked 3x3 convlayers as in paper def residual_block(l, increase_dim=False): input_num_filters = l.output_shape[1] if increase_dim: first_stride = (2, 2) out_num_filters = input_num_filters * 2 else: first_stride = (1, 1) out_num_filters = input_num_filters stack_1 = batch_norm( ConvLayer(l, num_filters=out_num_filters, filter_size=(3, 3), stride=first_stride, nonlinearity=tanh, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) stack_2 = ConvLayer(stack_1, num_filters=out_num_filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=None, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False) # add shortcut connections if increase_dim: # identity shortcut, as option A in paper identity = ExpressionLayer( l, lambda X: X[:, :, ::2, ::2], lambda s: (s[0], s[1], s[2] // 2, s[3] // 2)) padding = PadLayer(identity, [out_num_filters // 4, 0, 0], batch_ndim=1) block = ElemwiseSumLayer([stack_2, padding]) else: block = ElemwiseSumLayer([stack_2, l]) return block # Building the network l_in = InputLayer(shape=(None, 3, 32, 32), input_var=input_var) # first layer, output is 16 x 32 x 32 l = batch_norm( ConvLayer(l_in, num_filters=16, filter_size=(3, 3), stride=(1, 1), nonlinearity=tanh, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) # first stack of residual blocks, output is 16 x 32 x 32 for _ in range(n): l = residual_block(l) # second stack of residual blocks, output is 32 x 16 x 16 l = residual_block(l, increase_dim=True) for _ in range(1, n): l = residual_block(l) # third stack of residual blocks, output is 64 x 8 x 8 l = residual_block(l, increase_dim=True) for _ in range(1, n): l = residual_block(l) # average pooling l = GlobalPoolLayer(l) # fully connected layer network = DenseLayer(l, num_units=10, W=lasagne.init.HeNormal(), nonlinearity=softmax) return network
def build_fcn(input_var, inner_size): l_in = InputLayer(shape=(None, 1) + inner_size, input_var=input_var) conv1_1 = batch_norm( Conv2DLayer(l_in, num_filters=32, filter_size=(5, 5), nonlinearity=rectify, W=HeNormal(), pad=2)) conv1_2 = batch_norm( Conv2DLayer(conv1_1, num_filters=32, filter_size=(5, 5), nonlinearity=rectify, W=HeNormal(), pad=2)) conv1_3 = batch_norm( Conv2DLayer(conv1_2, num_filters=32, filter_size=(5, 5), nonlinearity=rectify, W=HeNormal(), pad=2)) pool1 = MaxPool2DLayer(conv1_3, pool_size=(2, 2)) # stage 2 conv2_1 = batch_norm( Conv2DLayer(pool1, num_filters=64, filter_size=(3, 3), nonlinearity=rectify, W=HeNormal(), pad=1)) conv2_2 = batch_norm( Conv2DLayer(conv2_1, num_filters=64, filter_size=(3, 3), nonlinearity=rectify, W=HeNormal(), pad=1)) conv2_3 = batch_norm( Conv2DLayer(conv2_2, num_filters=64, filter_size=(3, 3), nonlinearity=rectify, W=HeNormal(), pad=1)) pool2 = MaxPool2DLayer(conv2_3, pool_size=(2, 2)) # stage 3 conv3_1 = batch_norm( Conv2DLayer(pool2, num_filters=64, filter_size=(3, 3), nonlinearity=rectify, W=HeNormal(), pad=1)) conv3_2 = batch_norm( Conv2DLayer(conv3_1, num_filters=64, filter_size=(3, 3), nonlinearity=rectify, W=HeNormal(), pad=1)) pool3 = MaxPool2DLayer(conv3_2, pool_size=(2, 2)) # stage 3 conv4_1 = batch_norm( Conv2DLayer(pool3, num_filters=128, filter_size=(3, 3), nonlinearity=rectify, W=HeNormal(), pad=1)) conv4_2 = batch_norm( Conv2DLayer(conv4_1, num_filters=128, filter_size=(3, 3), nonlinearity=rectify, W=HeNormal(), pad=1)) pool4 = MaxPool2DLayer(conv4_2, pool_size=(2, 2)) # top-down stage 0 up4 = Upscale2DLayer(pool4, (2, 2)) up4_conv = batch_norm( Conv2DLayer(up4, num_filters=2, filter_size=(1, 1), nonlinearity=my_softmax, W=HeNormal())) pool3_conv = batch_norm( Conv2DLayer(pool3, num_filters=2, filter_size=(1, 1), nonlinearity=my_softmax, W=HeNormal())) concat4 = ElemwiseSumLayer([up4_conv, pool3_conv]) # top-down stage 1 up3 = Upscale2DLayer(concat4, (2, 2)) pool2_conv = batch_norm( Conv2DLayer(pool2, num_filters=2, filter_size=(1, 1), nonlinearity=my_softmax, W=HeNormal())) concat3 = ElemwiseSumLayer([up3, pool2_conv]) # top-down stage 2 pool1_conv = batch_norm( Conv2DLayer(pool1, num_filters=2, filter_size=(1, 1), nonlinearity=my_softmax, W=HeNormal())) up2 = Upscale2DLayer(concat3, (2, 2)) concat2 = ElemwiseSumLayer([up2, pool1_conv]) pred = averageLayer(Upscale2DLayer(concat2, (2, 2))) sli = SliceLayer(pred, indices=slice(0, 1), axis=1) area = GlobalPoolLayer(sli) return pred, sli, area
def mean_brightness_net(images): logits = GlobalPoolLayer(images) return logits
def build_fcn(input_var, inner_size): l_in = InputLayer(shape=(None, 1) + inner_size, input_var=input_var) # stage 1 conv1_1 = batch_norm( Conv2DLayer(l_in, num_filters=32, filter_size=(3, 3), nonlinearity=rectify, W=HeNormal(), pad=1)) conv1_2 = batch_norm( Conv2DLayer(conv1_1, num_filters=32, filter_size=(3, 3), nonlinearity=rectify, W=HeNormal(), pad=1)) conv1_3 = batch_norm( Conv2DLayer(conv1_2, num_filters=32, filter_size=(3, 3), nonlinearity=rectify, W=HeNormal(), pad=1)) pool1 = MaxPool2DLayer(conv1_3, pool_size=(2, 2)) # stage 2 conv2_1 = batch_norm( Conv2DLayer(pool1, num_filters=32, filter_size=(3, 3), nonlinearity=rectify, W=HeNormal(), pad=1)) conv2_2 = batch_norm( Conv2DLayer(conv2_1, num_filters=32, filter_size=(3, 3), nonlinearity=rectify, W=HeNormal(), pad=1)) conv2_3 = batch_norm( Conv2DLayer(conv2_2, num_filters=32, filter_size=(3, 3), nonlinearity=rectify, W=HeNormal(), pad=1)) pool2 = MaxPool2DLayer(conv2_3, pool_size=(2, 2)) # stage 3 conv3_1 = batch_norm( Conv2DLayer(pool2, num_filters=64, filter_size=(3, 3), nonlinearity=rectify, W=HeNormal(), pad=1)) conv3_2 = batch_norm( Conv2DLayer(conv3_1, num_filters=64, filter_size=(3, 3), nonlinearity=rectify, W=HeNormal(), pad=1)) pool3 = MaxPool2DLayer(conv3_2, pool_size=(2, 2)) # stage 3 conv4_1 = batch_norm( Conv2DLayer(pool3, num_filters=64, filter_size=(3, 3), nonlinearity=rectify, W=HeNormal(), pad=1)) conv4_2 = batch_norm( Conv2DLayer(conv4_1, num_filters=64, filter_size=(3, 3), nonlinearity=rectify, W=HeNormal(), pad=1)) # top-down stage 0 l4_conv = batch_norm( Conv2DLayer(conv4_2, num_filters=4, filter_size=(1, 1), nonlinearity=rectify, W=HeNormal())) up4 = Upscale2DLayer(l4_conv, (8, 8)) l3_conv = batch_norm( Conv2DLayer(conv3_2, num_filters=4, filter_size=(1, 1), nonlinearity=rectify, W=HeNormal())) up3 = Upscale2DLayer(l3_conv, (4, 4)) l2_conv = batch_norm( Conv2DLayer(conv2_3, num_filters=4, filter_size=(1, 1), nonlinearity=rectify, W=HeNormal())) up2 = Upscale2DLayer(l2_conv, (2, 2)) l1_conv = batch_norm( Conv2DLayer(conv1_3, num_filters=4, filter_size=(1, 1), nonlinearity=rectify, W=HeNormal())) concat = ConcatLayer([up4, up3, up2, l1_conv]) pred = Conv2DLayer(concat, num_filters=2, filter_size=(3, 3), nonlinearity=my_softmax, W=HeNormal(), pad=1) concat2 = ConcatLayer([concat, pred]) area1 = Conv2DLayer(concat2, num_filters=8, filter_size=(3, 3), nonlinearity=rectify, W=HeNormal(), pad=1) mid = Conv2DLayer(area1, num_filters=1, filter_size=(1, 1), W=HeNormal()) area = GlobalPoolLayer(mid) return pred, mid, area
def build_google(input_var): net = {} net['input'] = InputLayer((None, 3, 224, 224), input_var) net['conv1/7x7_s2'] = ConvLayer(net['input'], 64, 7, stride=2, pad=3, flip_filters=False) net['pool1/3x3_s2'] = PoolLayer(net['conv1/7x7_s2'], pool_size=3, stride=2, ignore_border=False) net['pool1/norm1'] = LRNLayer(net['pool1/3x3_s2'], alpha=0.00002, k=1) net['conv2/3x3_reduce'] = ConvLayer(net['pool1/norm1'], 64, 1, flip_filters=False) net['conv2/3x3'] = ConvLayer(net['conv2/3x3_reduce'], 192, 3, pad=1, flip_filters=False) net['conv2/norm2'] = LRNLayer(net['conv2/3x3'], alpha=0.00002, k=1) net['pool2/3x3_s2'] = PoolLayer(net['conv2/norm2'], pool_size=3, stride=2, ignore_border=False) net.update( build_inception_module('inception_3a', net['pool2/3x3_s2'], [32, 64, 96, 128, 16, 32])) net.update( build_inception_module('inception_3b', net['inception_3a/output'], [64, 128, 128, 192, 32, 96])) net['pool3/3x3_s2'] = PoolLayer(net['inception_3b/output'], pool_size=3, stride=2, ignore_border=False) net.update( build_inception_module('inception_4a', net['pool3/3x3_s2'], [64, 192, 96, 208, 16, 48])) net.update( build_inception_module('inception_4b', net['inception_4a/output'], [64, 160, 112, 224, 24, 64])) net.update( build_inception_module('inception_4c', net['inception_4b/output'], [64, 128, 128, 256, 24, 64])) net.update( build_inception_module('inception_4d', net['inception_4c/output'], [64, 112, 144, 288, 32, 64])) net.update( build_inception_module('inception_4e', net['inception_4d/output'], [128, 256, 160, 320, 32, 128])) net['pool4/3x3_s2'] = PoolLayer(net['inception_4e/output'], pool_size=3, stride=2, ignore_border=False) net.update( build_inception_module('inception_5a', net['pool4/3x3_s2'], [128, 256, 160, 320, 32, 128])) net.update( build_inception_module('inception_5b', net['inception_5a/output'], [128, 384, 192, 384, 48, 128])) net['pool5/7x7_s1'] = GlobalPoolLayer(net['inception_5b/output']) net['loss3/classifier'] = DenseLayer(net['pool5/7x7_s1'], num_units=1000, nonlinearity=linear) net['prob'] = NonlinearityLayer(net['loss3/classifier'], nonlinearity=softmax) return net
def ResNet_FullPre_Wide(input_var=None, nout=10, n=3, k=2, dropoutrate=0): def gelu(x): return 0.5 * x * ( 1 + T.tanh(T.sqrt(2 / np.pi) * (x + 0.044715 * T.pow(x, 3)))) f = gelu ''' Adapted from https://gist.github.com/FlorianMuellerklein/3d9ba175038a3f2e7de3794fa303f1ee which was tweaked to be consistent with 'Identity Mappings in Deep Residual Networks', Kaiming He et al. 2016 (https://arxiv.org/abs/1603.05027) And 'Wide Residual Networks', Sergey Zagoruyko, Nikos Komodakis 2016 (http://arxiv.org/pdf/1605.07146v1.pdf) ''' n_filters = {0: 16, 1: 16 * k, 2: 32 * k, 3: 64 * k} # create a residual learning building block with two stacked 3x3 convlayers and dropout def residual_block(l, first=False, increase_dim=False, filters=16): if increase_dim: first_stride = (2, 2) else: first_stride = (1, 1) conv_1 = ConvLayer(l, num_filters=filters, filter_size=(3, 3), stride=first_stride, nonlinearity=f, pad='same', W=HeNormal(gain='relu')) if dropoutrate > 0: # with dropout dropout = DropoutLayer(conv_1, p=dropoutrate) # contains the last weight portion, step 6 conv_2 = ConvLayer(dropout, num_filters=filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=f, pad='same', W=HeNormal(gain='relu')) else: # without dropout conv_2 = ConvLayer(conv_1, num_filters=filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=f, pad='same', W=HeNormal(gain='relu')) stack_3 = BatchNormLayer(conv_2) # add shortcut connections if increase_dim: # projection shortcut, as option B in paper projection = ConvLayer(l, num_filters=filters, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', b=None) block = ElemwiseSumLayer([stack_3, projection]) elif first: # projection shortcut, as option B in paper projection = ConvLayer(l, num_filters=filters, filter_size=(1, 1), stride=(1, 1), nonlinearity=None, pad='same', b=None) block = ElemwiseSumLayer([stack_3, projection]) else: block = ElemwiseSumLayer([stack_3, l]) return block # Building the network l_in = InputLayer(shape=(None, 3, 32, 32), input_var=input_var) # we're normalizing the input as the net sees fit, and we normalize the output l = batch_norm( ConvLayer(l_in, num_filters=n_filters[0], filter_size=(3, 3), stride=(1, 1), nonlinearity=f, pad='same', W=HeNormal(gain='relu'))) l = BatchNormLayer(l) # first stack of residual blocks l = residual_block(l, first=True, filters=n_filters[1]) for _ in range(1, n): l = residual_block(l, filters=n_filters[1]) # second stack of residual blocks l = residual_block(l, increase_dim=True, filters=n_filters[2]) for _ in range(1, n): l = residual_block(l, filters=n_filters[2]) # third stack of residual blocks l = residual_block(l, increase_dim=True, filters=n_filters[3]) for _ in range(1, n): l = residual_block(l, filters=n_filters[3]) bn_post_conv = BatchNormLayer(l) bn_post_relu = NonlinearityLayer(bn_post_conv, f) # average pooling avg_pool = GlobalPoolLayer(bn_post_relu) # fully connected layer network = DenseLayer(avg_pool, num_units=nout, W=HeNormal(), nonlinearity=lasagne.nonlinearities.identity) conf_layer_1 = DenseLayer(ElemwiseSumLayer([ DenseLayer(bn_post_relu, num_units=256, nonlinearity=lasagne.nonlinearities.identity), DenseLayer(network, num_units=256, nonlinearity=lasagne.nonlinearities.identity) ]), num_units=128, nonlinearity=f) conf = DenseLayer(conf_layer_1, num_units=1, nonlinearity=lasagne.nonlinearities.sigmoid) network = NonlinearityLayer(network, nonlinearity=lasagne.nonlinearities.softmax) return network, conf
def fusion_train(x_data_train, location_data_train, resolution_data_train, volume_data_train, x_data_val, location_data_val, resolution_data_val, volume_data_val, fixed_size, n_epoches=2, lr=0.001): loss_epoches = [] snapshot_root = 'fusion_snapshot' n_train_samples = len(x_data_train) n_val_samples = len(x_data_val) # all adapters adapters = [] adapters.append(adapter1((48, 48), 'fusion/fcn1/96.npz')) adapters.append(adapter2((48, 48), 'fusion/fcn2/92.npz')) adapters.append(adapter3((48, 48), 'fusion/fcn3/52.npz')) adapters.append(adapter4((48, 48), 'fusion/fcn4/470.npz')) adapters.append(adapter5((48, 48), 'fusion/fcn5/280.npz')) adapters.append(adapter6((48, 48), 'fusion/fcn6/114.npz')) # input tensor pred = T.tensor4('pred') location = T.vector('location') resolution = T.matrix('resolution') target_volume = T.fscalar('volume') # fusion layers l_in = InputLayer(shape=(None, len(adapters), fixed_size[0], fixed_size[1]), input_var=pred) mid = Conv2DLayer(l_in, num_filters=1, filter_size=(1, 1), W=HeNormal()) l_out = GlobalPoolLayer(mid) # area, 1d vector train_area = lasagne.layers.get_output(l_out, deterministic=True).flatten() val_area = lasagne.layers.get_output(l_out, deterministic=True).flatten() # predict volume, 0d scalar train_pred_volume = utee.build_volume2(train_area, location, resolution, fixed_size) val_pred_volume = utee.build_volume2(val_area, location, resolution, fixed_size) # loss, 0d scalar train_loss = T.abs_(train_pred_volume - target_volume).mean() / 600. val_loss = T.abs_(val_pred_volume - target_volume).mean() / 600. # params params = lasagne.layers.get_all_params(l_out, trainable=True) fusion_snapshot_path = 'fusion_snapshot/0.npz' if os.path.exists(fusion_snapshot_path): with np.load(fusion_snapshot_path) as f: param_values = [f['arr_{}'.format(i)] for i in range(len(f.files))] print('resuming snapshot from {}'.format(fusion_snapshot_path)) param_cur = lasagne.layers.get_all_params(l_out) assert len(param_cur) == len(param_values) for p, v in zip(param_cur, param_values): p.set_value(v) else: print("snapshot {} not found".format(fusion_snapshot_path)) #params[0].set_value(np.ones((1,6,1,1),dtype='float32')/6.) #print("snapshot to {}".format("0.npz")) #np.savez("0.npz", *lasagne.layers.get_all_param_values(l_out)) print(params[0].get_value(), params[0].get_value().shape) updates = lasagne.updates.nesterov_momentum(train_loss, params, learning_rate=lr, momentum=0.9) train_fn = theano.function([pred, location, resolution, target_volume], train_loss, updates=updates) val_fn = theano.function([pred, location, resolution, target_volume], val_loss) test_fn = theano.function([pred, location, resolution], [val_area, val_pred_volume]) area_fn = theano.function([pred], val_area) print("Training and validating precedure begin") for cur_epoch in range(n_epoches + 1): print("epoch {}/{} begin".format(cur_epoch, n_epoches)) if cur_epoch > 0: print(".training, {} samples to go".format(n_train_samples)) losses_data_train = [] for j in range(n_train_samples): x_e = x_data_train[j].astype('float32') preds = [] for adapter in adapters: preds.append(adapter.convert(x_e)) pred_e = np.concatenate(preds, axis=1) location_e = location_data_train[j].astype('float32') resolution_e = resolution_data_train[j].astype('float32') volume_e = volume_data_train[j].astype('float32') loss_data_train = train_fn(pred_e, location_e, resolution_e, volume_e) losses_data_train.append(loss_data_train) print(".training loss: {}".format(np.mean(loss_data_train))) if np.isnan(np.mean(loss_data_train)): print(".training detect nan, break and stop") break print(".validating, {} samples to go".format(n_val_samples)) losses_data_val = [] for i in range(n_val_samples): volume_min_e = volume_data_val[2 * i].astype('float32') volume_max_e = volume_data_val[2 * i + 1].astype('float32') pred_volumes_data = [] for j in range(len(x_data_val[i])): x_e = x_data_val[i][j].astype('float32') preds = [] for adapter in adapters: preds.append(adapter.convert(x_e)) pred_e = np.concatenate(preds, axis=1) location_e = location_data_val[i][j].astype('float32') resolution_e = resolution_data_val[i][j].astype('float32') _, pred_volume_data = test_fn(pred_e, location_e, resolution_e) if np.isnan(pred_volume_data): print(x_e, pred_e, location_e, resolution_e) pred_volumes_data.append(pred_volume_data) volume_min_pred = np.min(pred_volumes_data) volume_max_pred = np.max(pred_volumes_data) loss_min_data = np.abs(volume_min_pred - volume_min_e) / 600 loss_max_data = np.abs(volume_max_pred - volume_max_e) / 600 loss_data_val = (loss_min_data + loss_max_data) / 2.0 losses_data_val.append(loss_data_val) print(".validating loss: {}".format(np.mean(losses_data_val))) loss_epoches.append(np.mean(losses_data_val)) if np.isnan(np.mean(loss_data_val)): print(".training detect nan, break and stop") break cur_snapshot_path = os.path.join(snapshot_root, str(cur_epoch) + '.npz') print("snapshot to {}".format(cur_snapshot_path)) np.savez(cur_snapshot_path, *lasagne.layers.get_all_param_values(l_out)) print("Training done!") idx = np.argmin(loss_epoches) with open('./SETTINGS.json', 'r') as f: data = json.load(f) best_snapshot_path = os.path.join(snapshot_root, str(idx) + '.npz') print("add info {} to SETTINGS.json".format(best_snapshot_path)) data['FUSION_SNAPSHOT_PATH'] = os.path.join(best_snapshot_path) with open('./SETTINGS.json', 'w') as f: json.dump(data, f)
def ResNet_FullPreActivation(input_var=None, n=18): ''' Adapted from https://github.com/Lasagne/Recipes/tree/master/papers/deep_residual_learning. Tweaked to be consistent with 'Identity Mappings in Deep Residual Networks', Kaiming He et al. 2016 (https://arxiv.org/abs/1603.05027) Forumala to figure out depth: 6n + 2 ''' # create a residual learning building block with two stacked 3x3 convlayers as in paper def residual_block(l, increase_dim=False, projection=True, first=False): input_num_filters = l.output_shape[1] if increase_dim: first_stride = (2, 2) out_num_filters = input_num_filters * 2 else: first_stride = (1, 1) out_num_filters = input_num_filters if first: # hacky solution to keep layers correct bn_pre_relu = l else: # contains the BN -> ReLU portion, steps 1 to 2 bn_pre_conv = BatchNormLayer(l) bn_pre_relu = NonlinearityLayer(bn_pre_conv, rectify) # contains the weight -> BN -> ReLU portion, steps 3 to 5 conv_1 = batch_norm( ConvLayer(bn_pre_relu, num_filters=out_num_filters, filter_size=(3, 3), stride=first_stride, nonlinearity=rectify, pad='same', W=he_norm)) # contains the last weight portion, step 6 conv_2 = ConvLayer(conv_1, num_filters=out_num_filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=None, pad='same', W=he_norm) # add shortcut connections if increase_dim: # projection shortcut, as option B in paper projection = ConvLayer(l, num_filters=out_num_filters, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', b=None) block = ElemwiseSumLayer([conv_2, projection]) else: block = ElemwiseSumLayer([conv_2, l]) return block # Building the network l_in = InputLayer(shape=(None, 3, PIXELS, PIXELS), input_var=input_var) # first layer, output is 16 x 32 x 32 l = batch_norm( ConvLayer(l_in, num_filters=16, filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify, pad='same', W=he_norm)) # first stack of residual blocks, output is 16 x 32 x 32 l = residual_block(l, first=True) for _ in range(1, n): l = residual_block(l) # second stack of residual blocks, output is 32 x 16 x 16 l = residual_block(l, increase_dim=True) for _ in range(1, n): l = residual_block(l) # third stack of residual blocks, output is 64 x 8 x 8 l = residual_block(l, increase_dim=True) for _ in range(1, n): l = residual_block(l) bn_post_conv = BatchNormLayer(l) bn_post_relu = NonlinearityLayer(bn_post_conv, rectify) # average pooling avg_pool = GlobalPoolLayer(bn_post_relu) # fully connected layer network = DenseLayer(avg_pool, num_units=num_classes, W=HeNormal(), nonlinearity=softmax) return network
def build_GoogLeNet(width, height): # Download pretrained weights from: # https://s3.amazonaws.com/lasagne/recipes/pretrained/imagenet/blvc_googlenet.pkl net = {} net['input'] = InputLayer((None, 3, width, height)) net['conv1/7x7_s2'] = ConvLayer(net['input'], 64, 7, stride=2, pad=3, nonlinearity=elu, flip_filters=False) net['pool1/3x3_s2'] = PoolLayer(net['conv1/7x7_s2'], pool_size=3, stride=2) net['pool1/norm1'] = LRNLayer(net['pool1/3x3_s2'], alpha=0.00002, k=1) net['conv2/3x3_reduce'] = DropoutLayer(ConvLayer(net['pool1/norm1'], 64, 1, nonlinearity=elu, flip_filters=False), p=0.2) net['conv2/3x3'] = ConvLayer(net['conv2/3x3_reduce'], 192, 3, pad=1, nonlinearity=elu, flip_filters=False) net['conv2/norm2'] = LRNLayer(net['conv2/3x3'], alpha=0.00002, k=1) net['pool2/3x3_s2'] = PoolLayer(net['conv2/norm2'], pool_size=3, stride=2) net.update( build_inception_module('inception_3a', net['pool2/3x3_s2'], [32, 64, 96, 128, 16, 32])) net.update( build_inception_module('inception_3b', net['inception_3a/output'], [64, 128, 128, 192, 32, 96])) net['pool3/3x3_s2'] = PoolLayer(net['inception_3b/output'], pool_size=3, stride=2) net.update( build_inception_module('inception_4a', net['pool3/3x3_s2'], [64, 192, 96, 208, 16, 48])) net.update( build_inception_module('inception_4b', net['inception_4a/output'], [64, 160, 112, 224, 24, 64])) net.update( build_inception_module('inception_4c', net['inception_4b/output'], [64, 128, 128, 256, 24, 64])) net.update( build_inception_module('inception_4d', net['inception_4c/output'], [64, 112, 144, 288, 32, 64])) net.update( build_inception_module('inception_4e', net['inception_4d/output'], [128, 256, 160, 320, 32, 128])) net['pool4/3x3_s2'] = PoolLayer(net['inception_4e/output'], pool_size=3, stride=2) net.update( build_inception_module('inception_5a', net['pool4/3x3_s2'], [128, 256, 160, 320, 32, 128])) net.update( build_inception_module('inception_5b', net['inception_5a/output'], [128, 384, 192, 384, 48, 128])) net['pool5/7x7_s1'] = GlobalPoolLayer(net['inception_5b/output']) net['loss3/classifier'] = DenseLayer(net['pool5/7x7_s1'], num_units=2, nonlinearity=linear) net['softmax'] = NonlinearityLayer(net['loss3/classifier'], nonlinearity=softmax) return net
def wide_resnet(l_in, d, k): """Build a Wide-Resnet WRN-d-k Parameters ---------- :param l_in: input Layer :param d: network depth (d follow the relation d = 6 * n + 4 where n is the number of blocs by groups) :param k: widening factor """ if (d - 4) % 6 != 0: raise ValueError("d should be of the form d = 6 * n + 4") n = (d - 4) // 6 he_norm = lasagne.init.HeNormal(gain='relu') def basic_block(incoming, num_filters, stride, shortcut, name=None): name = name + "_" if name is not None else "" conv_path = BatchNormLayer(incoming) conv_path = NonlinearityLayer(conv_path, nonlinearity=rectify) rectified_input = conv_path # reused in linear shortcut conv_path = Conv2DLayer( conv_path, num_filters=num_filters, filter_size=(3, 3), stride=stride, pad='same', W=he_norm, b=None, nonlinearity=None, name=name + "conv1") conv_path = BatchNormLayer(conv_path) conv_path = NonlinearityLayer(conv_path, nonlinearity=rectify) conv_path = Conv2DLayer( conv_path, num_filters=num_filters, filter_size=(3, 3), pad='same', W=he_norm, b=None, nonlinearity=None, name=name + "conv2") if shortcut == 'identity': assert stride == (1, 1) or stride == 1 short_path = incoming elif shortcut == 'linear': short_path = Conv2DLayer( rectified_input, num_filters=num_filters, filter_size=(1, 1), stride=stride, pad='same', W=he_norm, b=None, nonlinearity=None) else: raise ValueError("invalid parameter value for shortcut") o = ElemwiseSumLayer([conv_path, short_path], name=name + "sum") return o net = Conv2DLayer( l_in, num_filters=16, filter_size=(3, 3), pad='same', W=he_norm, b=None, nonlinearity=None) net = basic_block(net, 16 * k, stride=(1, 1), shortcut='linear', name="block11") for i in range(1, n): net = basic_block(net, 16 * k, stride=(1, 1), shortcut='identity', name="block1" + str(i + 1)) net = basic_block(net, 32 * k, stride=(2, 2), shortcut='linear', name="block21") for i in range(1, n): net = basic_block(net, 32 * k, stride=(1, 1), shortcut='identity', name="block2" + str(i + 1)) net = basic_block(net, 64 * k, stride=(2, 2), shortcut='linear', name="block31") for i in range(1, n): net = basic_block(net, 64 * k, stride=(1, 1), shortcut='identity', name="block3" + str(i + 1)) net = BatchNormLayer(net) net = NonlinearityLayer(net, nonlinearity=rectify) net = GlobalPoolLayer(net, T.mean, name="MeanPool") return net
def __init__(self, numpy_rng, theano_rng=None, cfg=None, testing=False, input=None): self.layers = [] self.params = [] self.network = None self.cfg = cfg self.conv_layer_configs = cfg.conv_layer_configs self.conv_activation = cfg.conv_activation self.use_fast = cfg.use_fast if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) # allocate symbolic variables for the data if input == None: self.x = T.matrix('x') self.input = T.tensor4('inputs') else: self.x = input self.y = T.ivector('y') self.conv_layer_num = len(self.conv_layer_configs) config = self.conv_layer_configs[0] d1 = config['input_shape'][1] d2 = config['input_shape'][2] d3 = config['input_shape'][3] self.input = self.x.reshape((-1, d1, d2, d3)) #print "[Debug] input_shape: ", config['input_shape'] print "[Debug] d1,d2,d3: ", d1, d2, d3 num_blocks = 3 depth = 40 growth_rate = 12 dropout = 0 self.network = lasagne.layers.InputLayer(shape=(None, d1, d2, d3), input_var=self.input) self.network = lasagne.layers.Conv2DLayer( self.network, num_filters=256, filter_size=3, W=lasagne.init.HeNormal(gain='relu'), b=None, nonlinearity=None, name='pre_conv') self.layers.append(layer_info('conv', (256, 3, 3, 3), 1)) # W # note: The authors' implementation does *not* have a dropout after the # initial convolution. This was missing in the paper, but important. # if dropout: # network = DropoutLayer(network, dropout) # dense blocks with transitions in between n = (depth - 1) // num_blocks for b in range(num_blocks): nam = 'block' + str(b + 1) self.network = self.dense_block(self.network, n - 1, growth_rate, dropout, nam) if b < num_blocks - 1: nam = 'block' + str(b + 1) + '_trs' self.network = self.transition(self.network, dropout, nam) net_shape_tmp = lasagne.layers.get_output_shape(self.network) print "[Debug] net_shape: ", b, net_shape_tmp # post processing until prediction self.network = BatchNormLayer(self.network, name='post_bn') self.layers.append(layer_info('bn', num_params=2)) self.network = NonlinearityLayer(self.network, nonlinearity=rectify, name='post_relu') self.layers.append(layer_info('relu')) self.network = GlobalPoolLayer(self.network, name='post_pool') self.layers.append(layer_info('pool')) net_shape = lasagne.layers.get_output_shape(self.network) print "[Debug] before fc, net_shape: ", net_shape self.conv_output_dim = net_shape[1] cfg.n_ins = net_shape[1] self.network = DenseLayer(self.network, cfg.n_outs, nonlinearity=softmax, W=lasagne.init.HeNormal(gain=1), name='output') self.layers.append(layer_info('fc', (cfg.n_ins, cfg.n_outs), 2)) # define the cost and error prediction = lasagne.layers.get_output(self.network) self.finetune_cost = lasagne.objectives.categorical_crossentropy( prediction, self.y).mean() self.params = lasagne.layers.get_all_params(self.network, trainable=True)
def build_model(self, input_var, forward, dropout): net = dict() net['input'] = InputLayer((None, 3, None, None), input_var=input_var) net['conv'] = self.bn_conv(net['input'], num_filters=32, filter_size=3, stride=2) net['conv_1'] = self.bn_conv(net['conv'], num_filters=32, filter_size=3) net['conv_2'] = self.bn_conv(net['conv_1'], num_filters=64, filter_size=3, pad=1) net['pool'] = Pool2DLayer(net['conv_2'], pool_size=3, stride=2, mode='max') net['conv_3'] = self.bn_conv(net['pool'], num_filters=80, filter_size=1) net['conv_4'] = self.bn_conv(net['conv_3'], num_filters=192, filter_size=3) net['pool_1'] = Pool2DLayer(net['conv_4'], pool_size=3, stride=2, mode='max') net['mixed/join'] = self.inceptionA(net['pool_1'], nfilt=((64, ), (48, 64), (64, 96, 96), (32, ))) net['mixed_1/join'] = self.inceptionA(net['mixed/join'], nfilt=((64, ), (48, 64), (64, 96, 96), (64, ))) net['mixed_2/join'] = self.inceptionA(net['mixed_1/join'], nfilt=((64, ), (48, 64), (64, 96, 96), (64, ))) net['mixed_3/join'] = self.inceptionB(net['mixed_2/join'], nfilt=((384, ), (64, 96, 96))) net['mixed_4/join'] = self.inceptionC(net['mixed_3/join'], nfilt=((192, ), (128, 128, 192), (128, 128, 128, 128, 192), (192, ))) net['mixed_5/join'] = self.inceptionC(net['mixed_4/join'], nfilt=((192, ), (160, 160, 192), (160, 160, 160, 160, 192), (192, ))) net['mixed_6/join'] = self.inceptionC(net['mixed_5/join'], nfilt=((192, ), (160, 160, 192), (160, 160, 160, 160, 192), (192, ))) net['mixed_7/join'] = self.inceptionC(net['mixed_6/join'], nfilt=((192, ), (192, 192, 192), (192, 192, 192, 192, 192), (192, ))) # net['mixed_8/join'] = self.inceptionD( # net['mixed_7/join'], # nfilt=((192, 320), (192, 192, 192, 192))) # net['mixed_9/join'] = self.inceptionE( # net['mixed_8/join'], # nfilt=((320,), (384, 384, 384), (448, 384, 384, 384), (192,)), # pool_mode='average_exc_pad') # net['mixed_10/join'] = self.inceptionE( # net['mixed_9/join'], # nfilt=((320,), (384, 384, 384), (448, 384, 384, 384), (192,)), # pool_mode='max') net['pool3'] = GlobalPoolLayer(net['mixed_7/join']) net['prob'] = DenseLayer(net['pool3'], num_units=4, nonlinearity=softmax) return net
def build_model(input_var): # Three layer residual block def residual_block3(l, base_dim, increase_dim=False, projection=False): if increase_dim: layer_1 = batch_norm( ConvLayer(l, num_filters=base_dim, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', W=nn.init.HeNormal(gain='relu'))) else: layer_1 = batch_norm( ConvLayer(l, num_filters=base_dim, filter_size=(1, 1), stride=(1, 1), nonlinearity=rectify, pad='same', W=nn.init.HeNormal(gain='relu'))) layer_2 = batch_norm( ConvLayer(layer_1, num_filters=base_dim, filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify, pad='same', W=nn.init.HeNormal(gain='relu'))) layer_3 = batch_norm( ConvLayer(layer_2, num_filters=4 * base_dim, filter_size=(1, 1), stride=(1, 1), nonlinearity=rectify, pad='same', W=nn.init.HeNormal(gain='relu'))) # add shortcut connection if increase_dim: if projection: # projection shortcut (option B in paper) projection = batch_norm( ConvLayer(l, num_filters=4 * base_dim, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', b=None)) block = NonlinearityLayer(ElemwiseSumLayer( [layer_3, projection]), nonlinearity=rectify) else: # identity shortcut (option A in paper) # we use a pooling layer to get identity with strides, # since identity layers with stride don't exist in Lasagne identity = PoolLayer(l, pool_size=1, stride=(2, 2), mode='average_exc_pad') padding = PadLayer(identity, [4 * base_dim, 0, 0], batch_ndim=1) block = NonlinearityLayer(ElemwiseSumLayer([layer_3, padding]), nonlinearity=rectify) else: block = NonlinearityLayer(ElemwiseSumLayer([layer_3, l]), nonlinearity=rectify) return block # Input of the network input_layer = InputLayer(shape=(batch_size, num_channels, input_height, input_width), input_var=input_var) # Very first conv layer l = batch_norm( ConvLayer(input_layer, num_filters=64, filter_size=(7, 7), stride=(2, 2), nonlinearity=rectify, pad='same', W=nn.init.HeNormal(gain='relu'))) # Maxpool layer l = MaxPoolLayer(l, pool_size=(3, 3), stride=(2, 2)) # Convolove with 1x1 filter to match input dimension with the upcoming residual block l = batch_norm( ConvLayer(l, num_filters=256, filter_size=(1, 1), stride=(1, 1), nonlinearity=rectify, pad='same', W=nn.init.HeNormal(gain='relu'))) ############# First residual blocks ############# for _ in range(num_blocks[0] - 1): l = residual_block3(l, base_dim=64) ############# Second residual blocks ############ # Increment Dimension l = residual_block3(l, base_dim=128, increase_dim=True, projection=True) for _ in range(num_blocks[1] - 1): l = residual_block3(l, base_dim=128) ############# Third residual blocks ############# # Increment Dimension l = residual_block3(l, base_dim=256, increase_dim=True, projection=True) for _ in range(num_blocks[2] - 1): l = residual_block3(l, base_dim=256) ############# Fourth residual blocks ############# # Increment Dimension l = residual_block3(l, base_dim=512, increase_dim=True, projection=True) for _ in range(num_blocks[2] - 1): l = residual_block3(l, base_dim=512) # Global pooling layer l = GlobalPoolLayer(l) # Softmax Layer softmax_layer = DenseLayer(l, num_units=output_dim, W=nn.init.HeNormal(), nonlinearity=softmax) return softmax_layer
def build_cnn(cls_num, input_var=None, n=5, c=(3, 32, 32)): def residual_block(l, increase_dim=False, projection=False): input_num_filters = l.output_shape[1] if increase_dim: first_stride = (2, 2) out_num_filters = input_num_filters * 2 else: first_stride = (1, 1) out_num_filters = input_num_filters stack_1 = batch_norm( ConvLayer(l, num_filters=out_num_filters, filter_size=(3, 3), stride=first_stride, nonlinearity=rectify, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) stack_2 = batch_norm( ConvLayer(stack_1, num_filters=out_num_filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=None, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) # add shortcut connections if increase_dim: if projection: # projection shortcut, as option B in paper projection = batch_norm( ConvLayer(l, num_filters=out_num_filters, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', b=None, flip_filters=False)) block = NonlinearityLayer(ElemwiseSumLayer( [stack_2, projection]), nonlinearity=rectify) else: # identity shortcut, as option A in paper identity = ExpressionLayer( l, lambda X: X[:, :, ::2, ::2], lambda s: (s[0], s[1], s[2] // 2, s[3] // 2)) padding = PadLayer(identity, [out_num_filters // 4, 0, 0], batch_ndim=1) block = NonlinearityLayer(ElemwiseSumLayer([stack_2, padding]), nonlinearity=rectify) else: block = NonlinearityLayer(ElemwiseSumLayer([stack_2, l]), nonlinearity=rectify) return block # Building the network l_in = InputLayer(shape=(None, c[0], c[1], c[2]), input_var=input_var) # first layer, output is 16 x 32 x 32 l = batch_norm( ConvLayer(l_in, num_filters=16, filter_size=(3, 3), stride=(2, 2), nonlinearity=rectify, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) # first stack of residual blocks, output is 16 x 32 x 32 for _ in range(n): l = residual_block(l) # second stack of residual blocks, output is 32 x 16 x 16 l = residual_block(l, increase_dim=True) for _ in range(n): l = residual_block(l) l = residual_block(l, increase_dim=True) l1 = l2 = l for _ in range(1, n): l1 = residual_block(l1) # third stack of residual blocks, output is 64 x 8 x 8 l1 = residual_block(l1, increase_dim=True) for _ in range(1, n): l1 = residual_block(l1) # average pooling l1 = GlobalPoolLayer(l1) # fully connected layer cls_network = DenseLayer(l1, num_units=cls_num, W=lasagne.init.HeNormal(), nonlinearity=softmax) ###############kl line################## for _ in range(1, n): l2 = residual_block(l2) # third stack of residual blocks, output is 64 x 8 x 8 l2 = residual_block(l2, increase_dim=True) for _ in range(1, n): l2 = residual_block(l2) # average pooling l2 = GlobalPoolLayer(l2) # fully connected layer kl_network = DenseLayer(l2, num_units=cls_num, W=lasagne.init.HeNormal(), nonlinearity=softmax) return cls_network, kl_network
def build_model(): net = {} net['input'] = InputLayer((None, 3, image_size, image_size)) net['conv1/7x7_s2'] = ConvLayer(net['input'], 64, 7, stride=2, pad=3, flip_filters=False) net['pool1/3x3_s2'] = PoolLayer(net['conv1/7x7_s2'], pool_size=3, stride=2, ignore_border=False) net['pool1/norm1'] = LRNLayer(net['pool1/3x3_s2'], alpha=0.00002, k=1) net['conv2/3x3_reduce'] = ConvLayer(net['pool1/norm1'], 64, 1, flip_filters=False) net['conv2/3x3'] = ConvLayer(net['conv2/3x3_reduce'], 192, 3, pad=1, flip_filters=False) net['conv2/norm2'] = LRNLayer(net['conv2/3x3'], alpha=0.00002, k=1) net['pool2/3x3_s2'] = PoolLayer(net['conv2/norm2'], pool_size=3, stride=2) net.update( build_inception_module('inception_3a', net['pool2/3x3_s2'], [32, 64, 96, 128, 16, 32])) net.update( build_inception_module('inception_3b', net['inception_3a/output'], [64, 128, 128, 192, 32, 96])) net['pool3/3x3_s2'] = PoolLayer(net['inception_3b/output'], pool_size=3, stride=2) net.update( build_inception_module('inception_4a', net['pool3/3x3_s2'], [64, 192, 96, 208, 16, 48])) net.update( build_inception_module('inception_4b', net['inception_4a/output'], [64, 160, 112, 224, 24, 64])) net.update( build_inception_module('inception_4c', net['inception_4b/output'], [64, 128, 128, 256, 24, 64])) net.update( build_inception_module('inception_4d', net['inception_4c/output'], [64, 112, 144, 288, 32, 64])) net.update( build_inception_module('inception_4e', net['inception_4d/output'], [128, 256, 160, 320, 32, 128])) net['pool4/3x3_s2'] = PoolLayer(net['inception_4e/output'], pool_size=3, stride=2) net.update( build_inception_module('inception_5a', net['pool4/3x3_s2'], [128, 256, 160, 320, 32, 128])) net.update( build_inception_module('inception_5b', net['inception_5a/output'], [128, 384, 192, 384, 48, 128])) net['pool5/7x7_s1'] = GlobalPoolLayer(net['inception_5b/output']) import pickle model = pickle.load(open(root + 'models/blvc_googlenet.pkl')) set_all_param_values(net['pool5/7x7_s1'], model['param values'][:114]) net['loss3/classifier'] = DenseLayer(net['pool5/7x7_s1'], num_units=196, nonlinearity=linear) net['prob'] = NonlinearityLayer(net['loss3/classifier'], nonlinearity=softmax) model = NeuralNet( layers=net['prob'], #use_label_encoder=False, #objective_l2=1e-4, #1e-3 #update=lasagne.updates.adam, #update_learning_rate=1e-4, update=lasagne.updates.nesterov_momentum, update_momentum=0.9, update_learning_rate=theano.shared(float32(0.03)), # 1e-4 train_split=TrainSplit(0.1, random_state=42, stratify=False), #batch_iterator_train=train_iterator, #batch_iterator_test=test_iterator, on_epoch_finished=[ save_weights, save_training_history, plot_training_history, early_stopping, #StepDecay('update_learning_rate', start=1e-2, stop=1e-3) ], verbose=1, max_epochs=200, #custom_score = ('CRPS', CRPS) ) return model
def build_cnn(input_var=None): network = lasagne.layers.InputLayer(shape=(BATCHSIZE, 3, 256, 256), input_var=input_var) network = Conv2DLayer(network, num_filters=64, filter_size=7, stride=2, pad=3) network = lasagne.layers.MaxPool2DLayer(network, pool_size=3, stride=2, ignore_border=False) network = LRNLayer(network, alpha=0.00002, k=1) network = lasagne.layers.NINLayer(network, num_units=64, W=lasagne.init.Orthogonal(1), b=lasagne.init.Constant(0)) network = Conv2DLayer(network, 192, 3, pad=1) network = LRNLayer(network, alpha=0.00002, k=1) network = lasagne.layers.MaxPool2DLayer(network, pool_size=3, stride=2) network = inception_module(network, pool_filters=32, num_1x1=64, reduce_3x3=96, num_3x3=128, reduce_5x5=16, num_5x5=32) network = inception_module(network, pool_filters=64, num_1x1=128, reduce_3x3=128, num_3x3=192, reduce_5x5=32, num_5x5=96) network = lasagne.layers.MaxPool2DLayer(network, pool_size=3, stride=2) network = inception_module(network, pool_filters=64, num_1x1=192, reduce_3x3=96, num_3x3=208, reduce_5x5=16, num_5x5=48) network = inception_module(network, pool_filters=64, num_1x1=160, reduce_3x3=112, num_3x3=224, reduce_5x5=24, num_5x5=64) network = inception_module(network, pool_filters=64, num_1x1=128, reduce_3x3=128, num_3x3=256, reduce_5x5=24, num_5x5=64) network = inception_module(network, pool_filters=64, num_1x1=112, reduce_3x3=144, num_3x3=288, reduce_5x5=32, num_5x5=64) network = inception_module(network, pool_filters=128, num_1x1=256, reduce_3x3=160, num_3x3=320, reduce_5x5=32, num_5x5=128) network = lasagne.layers.MaxPool2DLayer(network, pool_size=3, stride=2) network = inception_module(network, pool_filters=128, num_1x1=256, reduce_3x3=160, num_3x3=320, reduce_5x5=32, num_5x5=128) network = inception_module(network, pool_filters=128, num_1x1=384, reduce_3x3=192, num_3x3=384, reduce_5x5=48, num_5x5=128) network = GlobalPoolLayer(network) network = lasagne.layers.DenseLayer( lasagne.layers.dropout(network, p=.4), num_units=447, nonlinearity=lasagne.nonlinearities.linear) network = lasagne.layers.NonlinearityLayer( network, nonlinearity=lasagne.nonlinearities.softmax) return network
def ResNet_FullPre_Wide(input_var=None, n=6, k=4): ''' Adapted from https://github.com/Lasagne/Recipes/tree/master/papers/deep_residual_learning. Tweaked to be consistent with 'Identity Mappings in Deep Residual Networks', Kaiming He et al. 2016 (https://arxiv.org/abs/1603.05027) And 'Wide Residual Networks', Sergey Zagoruyko, Nikos Komodakis 2016 (http://arxiv.org/pdf/1605.07146v1.pdf) Depth = 6n + 2 ''' n_filters = {0: 16, 1: 16 * k, 2: 32 * k, 3: 64 * k} # create a residual learning building block with two stacked 3x3 convlayers as in paper def residual_block(l, increase_dim=False, projection=True, first=False, filters=16): if increase_dim: first_stride = (2, 2) else: first_stride = (1, 1) if first: # hacky solution to keep layers correct bn_pre_relu = l else: # contains the BN -> ReLU portion, steps 1 to 2 bn_pre_conv = BatchNormLayer(l) bn_pre_relu = NonlinearityLayer(bn_pre_conv, rectify) # contains the weight -> BN -> ReLU portion, steps 3 to 5 conv_1 = batch_norm( ConvLayer(bn_pre_relu, num_filters=filters, filter_size=(3, 3), stride=first_stride, nonlinearity=rectify, pad='same', W=he_norm)) #dropout = DropoutLayer(conv_1, p=0.3) # contains the last weight portion, step 6 conv_2 = ConvLayer(conv_1, num_filters=filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=None, pad='same', W=he_norm) # add shortcut connections if increase_dim: # projection shortcut, as option B in paper projection = ConvLayer(bn_pre_relu, num_filters=filters, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', b=None) block = ElemwiseSumLayer([conv_2, projection]) elif first: # projection shortcut, as option B in paper projection = ConvLayer(l, num_filters=filters, filter_size=(1, 1), stride=(1, 1), nonlinearity=None, pad='same', b=None) block = ElemwiseSumLayer([conv_2, projection]) else: block = ElemwiseSumLayer([conv_2, l]) return block # Building the network l_in = InputLayer(shape=(None, 3, PIXELS, PIXELS), input_var=input_var) # first layer, output is 16 x 64 x 64 l = batch_norm( ConvLayer(l_in, num_filters=n_filters[0], filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify, pad='same', W=he_norm)) # first stack of residual blocks, output is 32 x 64 x 64 l = residual_block(l, first=True, filters=n_filters[1]) for _ in range(1, n): l = residual_block(l, filters=n_filters[1]) # second stack of residual blocks, output is 64 x 32 x 32 l = residual_block(l, increase_dim=True, filters=n_filters[2]) for _ in range(1, (n + 2)): l = residual_block(l, filters=n_filters[2]) # third stack of residual blocks, output is 128 x 16 x 16 l = residual_block(l, increase_dim=True, filters=n_filters[3]) for _ in range(1, (n + 2)): l = residual_block(l, filters=n_filters[3]) bn_post_conv = BatchNormLayer(l) bn_post_relu = NonlinearityLayer(bn_post_conv, rectify) # average pooling avg_pool = GlobalPoolLayer(bn_post_relu) # fully connected layer network = DenseLayer(avg_pool, num_units=10, W=HeNormal(), nonlinearity=softmax) return network
def build_cnn(input_var=None, n=5): # create a residual learning building block with two stacked 3x3 convlayers as in paper def residual_block(l, increase_dim=False, projection=False): input_num_filters = l.output_shape[1] if increase_dim: first_stride = (2, 2) out_num_filters = input_num_filters * 2 else: first_stride = (1, 1) out_num_filters = input_num_filters #print(l.output_shape) l_l = DenseLayer(l, num_units=l.output_shape[3], num_leading_axes=-1, nonlinearity=None) #print(l.output_shape[3]) #print("l_1.output_shape", l_l.output_shape) #stride=first_stride stack_left_1 = batch_norm( ConvLayer(l_l, num_filters=out_num_filters, filter_size=(3, 3), stride=first_stride, nonlinearity=rectify, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) stack_left_2 = batch_norm( ConvLayer(stack_left_1, num_filters=out_num_filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=None, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) #stack_right_1 = batch_norm(ConvLayer(ElemwiseSumLayer([l, NegativeLayer(l_l)]), num_filters=out_num_filters, filter_size=(2,2), stride=first_stride, nonlinearity=rectify, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) #stack_right_2 = batch_norm(ConvLayer(stack_right_1, num_filters=out_num_filters, filter_size=(2,2), stride=(1,1), nonlinearity=None, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) print("first stack: ", stack_left_2.output_shape) # add shortcut connections if increase_dim: if projection: # projection shortcut, as option B in paper projection = batch_norm( ConvLayer(l, num_filters=out_num_filters, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', b=None, flip_filters=False)) print("projection shape: ", projection.output_shape) ##block = NonlinearityLayer(ElemwiseSumLayer([stack_left_2, stack_right_2, projection]),nonlinearity=rectify) block = NonlinearityLayer(ElemwiseSumLayer( [stack_left_2, projection]), nonlinearity=rectify) else: # identity shortcut, as option A in paper #print(l.output_shape[2]) if (l.output_shape[2] % 2 == 0 and l.output_shape[3] % 2 == 0): identity = ExpressionLayer( l, lambda X: X[:, :, ::2, ::2], lambda s: (s[0], s[1], s[2] // 2, s[3] // 2)) elif (l.output_shape[2] % 2 == 0 and l.output_shape[3] % 2 == 1): identity = ExpressionLayer( l, lambda X: X[:, :, ::2, ::2], lambda s: (s[0], s[1], s[2] // 2, s[3] // 2 + 1)) elif (l.output_shape[2] % 2 == 1 and l.output_shape[3] % 2 == 0): identity = ExpressionLayer( l, lambda X: X[:, :, ::2, ::2], lambda s: (s[0], s[1], s[2] // 2 + 1, s[3] // 2)) else: identity = ExpressionLayer( l, lambda X: X[:, :, ::2, ::2], lambda s: (s[0], s[1], s[2] // 2 + 1, s[3] // 2 + 1)) padding = PadLayer(identity, [(int)(out_num_filters / 4), 0, 0], batch_ndim=1) print('------------------') print(stack_left_2.output_shape) #print(stack_right_2.output_shape) print(identity.output_shape) print(padding.output_shape) #block = NonlinearityLayer(ElemwiseSumLayer([stack_left_2, stack_right_2, padding]),nonlinearity=rectify) block = NonlinearityLayer(ElemwiseSumLayer( [stack_left_2, padding]), nonlinearity=rectify) else: #block = NonlinearityLayer(ElemwiseSumLayer([stack_left_2, stack_right_2, l]),nonlinearity=rectify) print("l output shape: ", l.output_shape) block = NonlinearityLayer(ElemwiseSumLayer([stack_left_2, l]), nonlinearity=rectify) return block # Building the network l_in = InputLayer(shape=(None, 16, 512, 660), input_var=input_var) # first layer, output is 16 x 32 x 32 l = batch_norm( ConvLayer(l_in, num_filters=16, filter_size=(3, 3), stride=(4, 4), nonlinearity=rectify, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) print(l.output_shape) # first stack of residual blocks, output is 16 x 32 x 32 for _ in range(n): l = residual_block(l) #print(l.output_shape) #print(l.output_shape) l = residual_block(l, increase_dim=True) for _ in range(n): l = residual_block(l) print(l.output_shape) l = residual_block(l, increase_dim=True) for _ in range(n): l = residual_block(l) print(l.output_shape) #second stack of residual blocks, output is 32 x 16 x 16 l = residual_block(l, increase_dim=True) for _ in range(n): l = residual_block(l) print(l.output_shape) """ # third stack of residual blocks, output is 64 x 8 x 8 l = residual_block(l, increase_dim=True) for _ in range(1,n): l = residual_block(l) """ # average pooling l = GlobalPoolLayer(l) print("before dense: ", l.output_shape) # fully connected layer network = DenseLayer(l, num_units=17, W=lasagne.init.HeNormal(), nonlinearity=sigmoid) return network
def build_model(): net = {} net['input'] = InputLayer(shape=(None, ) + nn_input_shape) net['reshuffle'] = DimshuffleLayer(net['input'], pattern=(0, 'x', 1, 2, 3)) net['conv'] = bc(net['reshuffle'], num_filters=32, filter_size=3, stride=2) net['conv_1'] = bc(net['conv'], num_filters=32, filter_size=3) net['conv_2'] = bc(net['conv_1'], num_filters=64, filter_size=3, pad=1) net['pool'] = Pool3DLayer(net['conv_2'], pool_size=3, stride=2, mode='max') # net['conv_3'] = bc(net['pool'], num_filters=80, filter_size=1) # net['conv_4'] = bc(net['conv_3'], num_filters=192, filter_size=3) # net['pool_1'] = Pool3DLayer(net['conv_4'], pool_size=3, stride=2, mode='max') # I divided all the number of filters by 2 net['mixed/join'] = inceptionA(net['pool'], nfilt=((32, ), (24, 32), (32, 48, 48), (16, ))) net['mixed_1/join'] = inceptionA(net['mixed/join'], nfilt=((32, ), (24, 32), (32, 48, 48), (32, ))) net['mixed_2/join'] = inceptionA(net['mixed_1/join'], nfilt=((32, ), (24, 32), (32, 48, 48), (32, ))) # I divided all the number of filters by 2 net['mixed_3/join'] = inceptionB(net['mixed_2/join'], nfilt=((192, ), (32, 48, 48))) # I divided all the number of filters by 4 net['mixed_4/join'] = inceptionC(net['mixed_3/join'], nfilt=((48, ), (32, 32, 32, 48), (32, 32, 32, 32, 32, 32, 48), (48, ))) net['mixed_5/join'] = inceptionC(net['mixed_4/join'], nfilt=((48, ), (40, 40, 40, 48), (40, 40, 40, 40, 40, 40, 48), (48, ))) net['mixed_6/join'] = inceptionC(net['mixed_5/join'], nfilt=((48, ), (40, 40, 40, 48), (40, 40, 40, 40, 40, 40, 48), (48, ))) net['mixed_7/join'] = inceptionC(net['mixed_6/join'], nfilt=((48, ), (48, 48, 40, 48), (48, 48, 48, 48, 48, 48, 48), (48, ))) net['mixed_8/join'] = inceptionD(net['mixed_7/join'], nfilt=((48, 80), (48, 48, 48, 48, 48))) net['mixed_9/join'] = inceptionE(net['mixed_8/join'], nfilt=((80, ), (96, 96, 96, 96), (112, 96, 96, 96, 96), (48, )), pool_mode='average_exc_pad') net['mixed_10/join'] = inceptionE(net['mixed_9/join'], nfilt=((80, ), (96, 96, 96, 96), (112, 96, 96, 96, 96), (48, )), pool_mode='max') net['pool3'] = GlobalPoolLayer(net['mixed_10/join']) net['sigmoid'] = DenseLayer(net['pool3'], num_units=1, W=lasagne.init.Constant(0.0), b=None, nonlinearity=lasagne.nonlinearities.sigmoid) net['output'] = reshape(net['sigmoid'], shape=(-1, )) return { "inputs": { "bcolzall:3d": net['input'], }, "outputs": { "predicted_probability": net['output'] }, }
def __init__(self, n_in, inception_layers, n_out, pool_sizes=None, n_hidden=512, trans_func=rectify, out_func=softmax, output_dropout=0.0, stats=0, batch_norm=False, inception_dropout=0.0): super(Incep, self).__init__(n_in, n_hidden, n_out, trans_func) self.outf = out_func self.log = "" # Overwrite input layer sequence_length, n_features = n_in self.l_in = InputLayer(shape=(None, sequence_length + stats, n_features), name='Input') l_prev = self.l_in # Separate into raw values and statistics if stats > 0: stats_layer = SliceLayer(l_prev, indices=slice(sequence_length, None), axis=1) stats_layer = ReshapeLayer(stats_layer, (-1, stats * n_features)) l_prev = SliceLayer(l_prev, indices=slice(0, sequence_length), axis=1) # 2D Convolutional layers-------------- l_prev = ReshapeLayer(l_prev, (-1, 1, sequence_length, n_features), name='Reshape') l_prev = DimshuffleLayer(l_prev, (0, 3, 2, 1), name='Dimshuffle') # Init with a Conv self.log += "\nAdding 2D conv layer: %d x %d" % (32, 3) l_prev = Conv2DLayer(l_prev, num_filters=32, filter_size=(3, 1), pad='same', nonlinearity=None, b=None, name='Input Conv2D') l_prev = BatchNormalizeLayer(l_prev, normalize=batch_norm, nonlinearity=self.transf) l_prev = Pool2DLayer(l_prev, pool_size=(2, 1), name='Input pool') l_prev = TiedDropoutLayer(l_prev, p=inception_dropout, name='Input conv dropout') # Inception layers for inception_layer, pool_size in zip(inception_layers, pool_sizes): num_1x1, num_2x1_proj, reduce_3x1, num_3x1, reduce_5x1, num_5x1 = inception_layer self.log += "\nAdding inception layer: %s" % str(inception_layer) l_prev = inception_module(l_prev, num_1x1, num_2x1_proj, reduce_3x1, num_3x1, reduce_5x1, num_5x1, batch_norm=batch_norm) if pool_size > 1: self.log += "\nAdding max pooling layer: %d" % pool_size l_prev = Pool2DLayer(l_prev, pool_size=(pool_size, 1), name='Inception pool') self.log += "\nAdding dropout layer: %.2f" % inception_dropout l_prev = TiedDropoutLayer(l_prev, p=inception_dropout, name='Inception dropout') print("Inception out shape", l_prev.output_shape) # Global pooling layer self.log += "\nGlobal Pooling: average" l_prev = GlobalPoolLayer(l_prev, pool_function=T.mean, name='Global average pool') # Append statistics if stats > 0: l_prev = ConcatLayer((l_prev, stats_layer), axis=1) if n_hidden: self.log += "\nAdding dense layer with %d units" % n_hidden print("Dense input shape", l_prev.output_shape) l_prev = DenseLayer(l_prev, num_units=n_hidden, nonlinearity=self.transf, name='Dense') if batch_norm: l_prev = BatchNormLayer(l_prev) if output_dropout: self.log += "\nAdding output dropout with probability %.2f" % output_dropout l_prev = DropoutLayer(l_prev, p=output_dropout, name='Dense dropout') self.model = DenseLayer(l_prev, num_units=n_out, nonlinearity=out_func, name='Output') self.model_params = get_all_params(self.model) self.sym_x = T.tensor3('x') self.sym_t = T.matrix('t')
def build_network(): conv_defs = { 'W': lasagne.init.HeNormal('relu'), 'b': lasagne.init.Constant(0.0), 'filter_size': (3, 3), 'stride': (1, 1), 'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1) } nin_defs = { 'W': lasagne.init.HeNormal('relu'), 'b': lasagne.init.Constant(0.0), 'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1) } dense_defs = { 'W': lasagne.init.HeNormal(1.0), 'b': lasagne.init.Constant(0.0), 'nonlinearity': lasagne.nonlinearities.softmax } wn_defs = {'momentum': .999} net = InputLayer(name='input', shape=(None, 3, 32, 32)) net = GaussianNoiseLayer(net, name='noise', sigma=.15) net = WN( Conv2DLayer(net, name='conv1a', num_filters=128, pad='same', **conv_defs), **wn_defs) net = WN( Conv2DLayer(net, name='conv1b', num_filters=128, pad='same', **conv_defs), **wn_defs) net = WN( Conv2DLayer(net, name='conv1c', num_filters=128, pad='same', **conv_defs), **wn_defs) net = MaxPool2DLayer(net, name='pool1', pool_size=(2, 2)) net = DropoutLayer(net, name='drop1', p=.5) net = WN( Conv2DLayer(net, name='conv2a', num_filters=256, pad='same', **conv_defs), **wn_defs) net = WN( Conv2DLayer(net, name='conv2b', num_filters=256, pad='same', **conv_defs), **wn_defs) net = WN( Conv2DLayer(net, name='conv2c', num_filters=256, pad='same', **conv_defs), **wn_defs) net = MaxPool2DLayer(net, name='pool2', pool_size=(2, 2)) net = DropoutLayer(net, name='drop2', p=.5) net = WN( Conv2DLayer(net, name='conv3a', num_filters=512, pad=0, **conv_defs), **wn_defs) net = WN(NINLayer(net, name='conv3b', num_units=256, **nin_defs), **wn_defs) net = WN(NINLayer(net, name='conv3c', num_units=128, **nin_defs), **wn_defs) net = GlobalPoolLayer(net, name='pool3') net = WN(DenseLayer(net, name='dense', num_units=10, **dense_defs), **wn_defs) return net
l_in = InputLayer(shape=(None, 1, image_size, image_size), input_var=X) l = batch_norm(ConvLayer(l_in, num_filters=n_filters[0], filter_size=(3, 3), \ stride=(1, 1), nonlinearity=rectify, pad='same', W=HeNormal(gain='relu'))) l = residual_block(l, first=True, filters=n_filters[1]) for _ in range(1, wrn_n): l = residual_block(l, filters=n_filters[1]) l = residual_block(l, increase_dim=True, filters=n_filters[2]) for _ in range(1, (wrn_n + 2)): l = residual_block(l, filters=n_filters[2]) l = residual_block(l, increase_dim=True, filters=n_filters[3]) for _ in range(1, (wrn_n + 2)): l = residual_block(l, filters=n_filters[3]) bn_post_conv = BatchNormLayer(l) bn_post_relu = NonlinearityLayer(bn_post_conv, rectify) avg_pool = GlobalPoolLayer(bn_post_relu) dense_layer = DenseLayer(avg_pool, num_units=128, W=HeNormal(gain='relu'), nonlinearity=rectify) dist_layer = ExpressionLayer( dense_layer, lambda I: T.abs_(I[:I.shape[0] / 2] - I[I.shape[0] / 2:]), output_shape='auto') l_y = DenseLayer(dist_layer, num_units=1, nonlinearity=sigmoid) prediction = get_output(l_y) prediction_clean = get_output(l_y, deterministic=True) loss = T.mean(binary_crossentropy(prediction, y)) accuracy = T.mean(binary_accuracy(prediction_clean, y))
def cifar_model(cls, n=31, incoming=None, classes=10, **kwargs): r"""Create a densenet for the CIFAR or SVHN datasets. Parameters ---------- n : integer (``31``) A parameter to control the length of the network. classes :integer (``10``) Number of classes, usually ``10`` or ``100``. incoming : a :class:`Layer` instance or ``None`` The input layer for the densenet, if ``None`` a new layer will be created. growth : integer (``40``) The growth factor of the densenet. This is equivalent to the parameter $k$ in the paper. bottleneck : boolean (``True``) If ``True`` the two layer bottleneck approach is used. neck_size : integer or ``None`` (``None``) In case of bottlenecking the first layer will return ``neck_size`` number of channels. If it is ``None`` the neck size will be set to four times the growth rate ($4 \cdot k$). compression : number (float) in [0, 1] or integer (``0``) The amount of compression to use. If this parameter is a number in [0, 1) it will be interpreted as compression rate and will be equivalent to $1 - \theta$, where $\theta$ is the parameter from the paper. A compression rate of ``0.5`` will half the number of channels in the the transition, ``0`` will perform no compression. If this number is a integer it describes the number of output channels for the transition. dropout : number (float) in [0, 1] (``0``) The dropout probability, If it is ``0`` no dropout is performed. Returns ------- a :class:`DenseLayer` instance The model in form of its last layer. """ model = incoming or InputLayer(shape=(None, 3, 32, 32)) builder = cls(model, **kwargs) if builder.bottleneck and builder.compression: model = builder.convolution(model, 2 * builder.growth, init_gain=1.0) else: model = builder.convolution(model, 16, init_gain=1.0) builder.current = builder.batchnorm_pt1(model) builder.add_dense_block(n) builder.transition() builder.add_dense_block(n) builder.transition() builder.add_dense_block(n) model = builder.batchnorm_pt2(builder.current) model = builder.nonlinearity(model) model = GlobalPoolLayer(model) model = DenseLayer(model, num_units=classes, W=HeNormal(gain='relu'), nonlinearity=softmax) return model
def build_densenet(input_var, input_shape=(None, 3, 224, 224), num_filters_init=64, growth_rate=32, dropout=0.2, num_classes=1000, stages=[6, 12, 24, 16]): if input_shape[2] % (2**len(stages)) != 0: raise ValueError("input_shape[2] must be a multiple of {}.".format( 2**len(stages))) if input_shape[3] % (2**len(stages)) != 0: raise ValueError("input_shape[3] must be a multiple of {}.".format( 2**len(stages))) # Input should be (BATCH_SIZE, NUM_CHANNELS, WIDTH, HEIGHT) # NUM_CHANNELS is usually 3 (R,G,B) and for the ImageNet example the width and height are 224 network = InputLayer(input_shape, input_var) # Apply 2D convolutions with a 7x7 filter (pad by 3 on each side) # Because of the 2x2 stride the shape of the last two dimensions will be half the size of the input (112x112) network = Conv2DLayer(network, num_filters=num_filters_init, filter_size=(7, 7), stride=(2, 2), pad=(3, 3), W=HeNormal(gain='relu'), b=None, flip_filters=False, nonlinearity=None) # Batch normalize network = BatchNormLayer(network, beta=None, gamma=None) # If dropout is enabled, apply after every convolutional and dense layer if dropout > 0: network = DropoutLayer(network, p=dropout) # Apply ReLU network = NonlinearityLayer(network, nonlinearity=rectify) # Keep the maximum value of a 3x3 pool with a 2x2 stride # This operation again divides the size of the last two dimensions by two (56x56) network = MaxPool2DLayer(network, pool_size=(3, 3), stride=(2, 2), pad=(1, 1)) # Add dense blocks for i, num_layers in enumerate(stages): # Except for the first block, we add a transition layer before the dense block that halves the number of filters, width and height if i > 0: network = add_transition(network, math.floor(network.output_shape[1] / 2), dropout) network = build_block(network, num_layers, growth_rate, dropout) # Apply global pooling and add a fully connected layer with softmax function network = ScaleLayer(network) network = BiasLayer(network) network = NonlinearityLayer(network, nonlinearity=rectify) network = GlobalPoolLayer(network) network = DenseLayer(network, num_units=num_classes, W=HeNormal(gain=1), nonlinearity=softmax) return network
def build_network(): net = {} net['input'] = InputLayer((None, 3, 299, 299)) net['conv'] = bn_conv(net['input'], num_filters=32, filter_size=3, stride=2) net['conv_1'] = bn_conv(net['conv'], num_filters=32, filter_size=3) net['conv_2'] = bn_conv(net['conv_1'], num_filters=64, filter_size=3, pad=1) net['pool'] = Pool2DLayer(net['conv_2'], pool_size=3, stride=2, mode='max') net['conv_3'] = bn_conv(net['pool'], num_filters=80, filter_size=1) net['conv_4'] = bn_conv(net['conv_3'], num_filters=192, filter_size=3) net['pool_1'] = Pool2DLayer(net['conv_4'], pool_size=3, stride=2, mode='max') net['mixed/join'] = inceptionA(net['pool_1'], nfilt=((64, ), (48, 64), (64, 96, 96), (32, ))) net['mixed_1/join'] = inceptionA(net['mixed/join'], nfilt=((64, ), (48, 64), (64, 96, 96), (64, ))) net['mixed_2/join'] = inceptionA(net['mixed_1/join'], nfilt=((64, ), (48, 64), (64, 96, 96), (64, ))) net['mixed_3/join'] = inceptionB(net['mixed_2/join'], nfilt=((384, ), (64, 96, 96))) net['mixed_4/join'] = inceptionC(net['mixed_3/join'], nfilt=((192, ), (128, 128, 192), (128, 128, 128, 128, 192), (192, ))) net['mixed_5/join'] = inceptionC(net['mixed_4/join'], nfilt=((192, ), (160, 160, 192), (160, 160, 160, 160, 192), (192, ))) net['mixed_6/join'] = inceptionC(net['mixed_5/join'], nfilt=((192, ), (160, 160, 192), (160, 160, 160, 160, 192), (192, ))) net['mixed_7/join'] = inceptionC(net['mixed_6/join'], nfilt=((192, ), (192, 192, 192), (192, 192, 192, 192, 192), (192, ))) net['mixed_8/join'] = inceptionD(net['mixed_7/join'], nfilt=((192, 320), (192, 192, 192, 192))) net['mixed_9/join'] = inceptionE(net['mixed_8/join'], nfilt=((320, ), (384, 384, 384), (448, 384, 384, 384), (192, )), pool_mode='average_exc_pad') net['mixed_10/join'] = inceptionE(net['mixed_9/join'], nfilt=((320, ), (384, 384, 384), (448, 384, 384, 384), (192, )), pool_mode='max') net['pool3'] = GlobalPoolLayer(net['mixed_10/join']) net['softmax'] = DenseLayer(net['pool3'], num_units=1008, nonlinearity=softmax) return net
def build_densenet(input_shape=(None, 3, 32, 32), input_var=None, classes=10, depth=40, first_output=16, growth_rate=12, num_blocks=3, dropout=0): """ Creates a DenseNet model in Lasagne. Parameters ---------- input_shape : tuple The shape of the input layer, as ``(batchsize, channels, rows, cols)``. Any entry except ``channels`` can be ``None`` to indicate free size. input_var : Theano expression or None Symbolic input variable. Will be created automatically if not given. classes : int The number of classes of the softmax output. depth : int Depth of the network. Must be ``num_blocks * n + 1`` for some ``n``. (Parameterizing by depth rather than n makes it easier to follow the paper.) first_output : int Number of channels of initial convolution before entering the first dense block, should be of comparable size to `growth_rate`. growth_rate : int Number of feature maps added per layer. num_blocks : int Number of dense blocks (defaults to 3, as in the original paper). dropout : float The dropout rate. Set to zero (the default) to disable dropout. batchsize : int or None The batch size to build the model for, or ``None`` (the default) to allow any batch size. inputsize : int, tuple of int or None Returns ------- network : Layer instance Lasagne Layer instance for the output layer. References ---------- .. [1] Gao Huang et al. (2016): Densely Connected Convolutional Networks. https://arxiv.org/abs/1608.06993 """ if (depth - 1) % num_blocks != 0: raise ValueError("depth must be num_blocks * n + 1 for some n") # input and initial convolution network = InputLayer(input_shape, input_var, name='input') network = Conv2DLayer(network, first_output, 3, pad='same', W=lasagne.init.HeNormal(gain='relu'), b=None, nonlinearity=None, name='pre_conv') # note: The authors' implementation does *not* have a dropout after the # initial convolution. This was missing in the paper, but important. # if dropout: # network = DropoutLayer(network, dropout) # dense blocks with transitions in between n = (depth - 1) // num_blocks blocks = {} for b in range(num_blocks): # if b == 0: # block_size = n - 1 # else: # block_size = n - 2 network = dense_block(network, n - 1, (2**b) * growth_rate, dropout, name_prefix='block%d' % (b + 1)) if b < num_blocks - 1: network = transition(network, dropout, name_prefix='block%d_trs' % (b + 1)) # post processing until prediction network = BatchNormLayer(network, name='last_bn') network = NonlinearityLayer(network, nonlinearity=rectify, name='last_relu') network = Conv2DLayer(network, network.output_shape[1], 3, stride=1, pad='same', W=lasagne.init.HeNormal(gain='relu'), b=None, nonlinearity=None, name='last_conv') if dropout: network = DropoutLayer(network, dropout) network = BatchNormLayer(network, name='post_bn') network = NonlinearityLayer(network, nonlinearity=rectify, name='post_relu') network = GlobalPoolLayer(network, name='post_pool') network = DenseLayer(network, classes, nonlinearity=softmax, W=lasagne.init.HeNormal(gain=1), name='output') return network
def ResNet_BottleNeck_FullPreActivation(input_var=None, window_length=100, feat_dim=23, n=18): ''' Adapted from https://github.com/Lasagne/Recipes/tree/master/papers/deep_residual_learning. Tweaked to be consistent with 'Identity Mappings in Deep Residual Networks', Kaiming He et al. 2016 (https://arxiv.org/abs/1603.05027) Judging from https://github.com/KaimingHe/resnet-1k-layers/blob/master/resnet-pre-act.lua. Number of filters go 16 -> 64 -> 128 -> 256 Forumala to figure out depth: 9n + 2 ''' # create a residual learning building block with two stacked 3x3 convlayers as in paper def residual_bottleneck_block(l, increase_dim=False, first=False): input_num_filters = l.output_shape[1] if increase_dim: first_stride = (2, 2) out_num_filters = input_num_filters * 2 else: first_stride = (1, 1) out_num_filters = input_num_filters if first: # hacky solution to keep layers correct bn_pre_relu = l out_num_filters = out_num_filters * 4 else: # contains the BN -> ReLU portion, steps 1 to 2 bn_pre_conv = BatchNormLayer(l) bn_pre_relu = NonlinearityLayer(bn_pre_conv, rectify) bottleneck_filters = out_num_filters / 4 # contains the weight -> BN -> ReLU portion, steps 3 to 5 conv_1 = batch_norm( ConvLayer(bn_pre_relu, num_filters=bottleneck_filters, filter_size=(1, 1), stride=(1, 1), nonlinearity=rectify, pad='same', W=he_norm)) conv_2 = batch_norm( ConvLayer(conv_1, num_filters=bottleneck_filters, filter_size=(3, 3), stride=first_stride, nonlinearity=rectify, pad='same', W=he_norm)) # contains the last weight portion, step 6 conv_3 = ConvLayer(conv_2, num_filters=out_num_filters, filter_size=(1, 1), stride=(1, 1), nonlinearity=None, pad='same', W=he_norm) if increase_dim: # projection shortcut, as option B in paper projection = ConvLayer(bn_pre_relu, num_filters=out_num_filters, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', b=None) block = ElemwiseSumLayer([conv_3, projection]) elif first: # projection shortcut, as option B in paper projection = ConvLayer(l, num_filters=out_num_filters, filter_size=(1, 1), stride=(1, 1), nonlinearity=None, pad='same', b=None) block = ElemwiseSumLayer([conv_3, projection]) else: block = ElemwiseSumLayer([conv_3, l]) return block # Building the network l_in = InputLayer(shape=(None, 3, window_length, feat_dim), input_var=input_var) # first layer, output is 16x16x16 l = batch_norm( ConvLayer(l_in, num_filters=16, filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify, pad='same', W=he_norm)) # first stack of residual blocks, output is 64x16x16 l = residual_bottleneck_block(l, first=True) for _ in range(1, n): l = residual_bottleneck_block(l) # second stack of residual blocks, output is 128x8x8 l = residual_bottleneck_block(l, increase_dim=True) for _ in range(1, n): l = residual_bottleneck_block(l) # third stack of residual blocks, output is 256x4x4 l = residual_bottleneck_block(l, increase_dim=True) for _ in range(1, n): l = residual_bottleneck_block(l) bn_post_conv = BatchNormLayer(l) bn_post_relu = NonlinearityLayer(bn_post_conv, rectify) # average pooling avg_pool = GlobalPoolLayer(bn_post_relu) # fully connected layer #network = DenseLayer(avg_pool, num_units=10, W=HeNormal(), nonlinearity=softmax) network = DenseLayer(avg_pool, num_units=1, W=HeNormal(), nonlinearity=sigmoid) return network
def ResNet_FullPre_Wide(input_var=None, nout=10, n=3, k=2, dropoutrate=0): ''' Adapted from https://gist.github.com/FlorianMuellerklein/3d9ba175038a3f2e7de3794fa303f1ee which was tweaked to be consistent with 'Identity Mappings in Deep Residual Networks', Kaiming He et al. 2016 (https://arxiv.org/abs/1603.05027) And 'Wide Residual Networks', Sergey Zagoruyko, Nikos Komodakis 2016 (http://arxiv.org/pdf/1605.07146v1.pdf) ''' n_filters = {0: 16, 1: 16 * k, 2: 32 * k, 3: 64 * k} # create a residual learning building block with two stacked 3x3 convlayers and dropout def residual_block(l, increase_dim=False, first=False, filters=16): if increase_dim: first_stride = (2, 2) else: first_stride = (1, 1) if first: # hacky solution to keep layers correct bn_pre_relu = l else: # contains the BN -> ReLU portion, steps 1 to 2 bn_pre_conv = BatchNormLayer(l) bn_pre_relu = NonlinearityLayer(bn_pre_conv, rectify) # contains the weight -> BN -> ReLU portion, steps 3 to 5 conv_1 = batch_norm( ConvLayer(bn_pre_relu, num_filters=filters, filter_size=(3, 3), stride=first_stride, nonlinearity=rectify, pad='same', W=HeNormal(gain='relu'))) if dropoutrate > 0: # with dropout dropout = DropoutLayer(conv_1, p=dropoutrate) # contains the last weight portion, step 6 conv_2 = ConvLayer(dropout, num_filters=filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=None, pad='same', W=HeNormal(gain='relu')) else: # without dropout conv_2 = ConvLayer(conv_1, num_filters=filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=None, pad='same', W=HeNormal(gain='relu')) # add shortcut connections if increase_dim: # projection shortcut, as option B in paper projection = ConvLayer(l, num_filters=filters, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', b=None) block = ElemwiseSumLayer([conv_2, projection]) elif first: # projection shortcut, as option B in paper projection = ConvLayer(l, num_filters=filters, filter_size=(1, 1), stride=(1, 1), nonlinearity=None, pad='same', b=None) block = ElemwiseSumLayer([conv_2, projection]) else: block = ElemwiseSumLayer([conv_2, l]) return block # Building the network l_in = InputLayer(shape=(None, 3, 32, 32), input_var=input_var) # first layer= l = batch_norm( ConvLayer(l_in, num_filters=n_filters[0], filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify, pad='same', W=HeNormal(gain='relu'))) # first stack of residual blocks l = residual_block(l, first=True, filters=n_filters[1]) for _ in range(1, n): l = residual_block(l, filters=n_filters[1]) # second stack of residual blocks l = residual_block(l, increase_dim=True, filters=n_filters[2]) for _ in range(1, n): l = residual_block(l, filters=n_filters[2]) # third stack of residual blocks l = residual_block(l, increase_dim=True, filters=n_filters[3]) for _ in range(1, n): l = residual_block(l, filters=n_filters[3]) bn_post_conv = BatchNormLayer(l) bn_post_relu = NonlinearityLayer(bn_post_conv, rectify) # average pooling avg_pool = GlobalPoolLayer(bn_post_relu) # fully connected layer network = DenseLayer(avg_pool, num_units=nout, W=HeNormal(), nonlinearity=softmax) return network