def __init__(self, overlapping_classes=None, exclusive_classes=None, analytics_input=True, network_type='conv_net', num_words=60, width=100, lookup_size=0, lookup_dim=0, optimizer=Adam()): assert (overlapping_classes is not None) or (exclusive_classes is not None) self.width = width self.num_words = num_words self.overlapping_classes = overlapping_classes self.exclusive_classes = exclusive_classes self.analytics_input = analytics_input self.recurrent = network_type == 'lstm' self.lookup_size = lookup_size self.lookup_dim = lookup_dim init = GlorotUniform() activation = Rectlin(slope=1E-05) gate = Logistic() input_layers = self.input_layers(analytics_input, init, activation, gate) if self.overlapping_classes is None: output_layers = [ Affine(len(self.exclusive_classes), init, activation=Softmax()) ] elif self.exclusive_classes is None: output_layers = [ Affine(len(self.overlapping_classes), init, activation=Logistic()) ] else: output_branch = BranchNode(name='exclusive_overlapping') output_layers = Tree([[ SkipNode(), output_branch, Affine(len(self.exclusive_classes), init, activation=Softmax()) ], [ output_branch, Affine(len(self.overlapping_classes), init, activation=Logistic()) ]]) layers = [ input_layers, # this is where inputs meet, and where we may want to add depth or # additional functionality Dropout(keep=0.8), output_layers ] super(ClassifierNetwork, self).__init__(layers, optimizer=optimizer)
def run(train, test): init = Gaussian(scale=0.01) layers = [ Conv((3, 3, 128), init=init, activation=Rectlin(), strides=dict(str_h=1, str_w=2)), Conv((3, 3, 256), init=init, batch_norm=True, activation=Rectlin()), Pooling(2, strides=2), Conv((2, 2, 512), init=init, batch_norm=True, activation=Rectlin()), DeepBiRNN(256, init=init, activation=Rectlin(), reset_cells=True, depth=3), RecurrentLast(), Affine(32, init=init, batch_norm=True, activation=Rectlin()), Affine(nout=common['nclasses'], init=init, activation=Softmax()) ] model = Model(layers=layers) opt = Adadelta() metric = Misclassification() callbacks = Callbacks(model, eval_set=test, metric=metric, **args.callback_args) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) return model
def create_model(vocab_size, rlayer_type): """ Create LSTM/GRU model for bAbI dataset. Args: vocab_size (int) : String of bAbI data. rlayer_type (string) : Type of recurrent layer to use (gru or lstm). Returns: Model : Model of the created network """ # recurrent layer parameters (default gru) rlayer_obj = GRU if rlayer_type == 'gru' else LSTM rlayer_params = dict(output_size=100, reset_cells=True, init=GlorotUniform(), init_inner=Orthonormal(0.5), activation=Tanh(), gate_activation=Logistic()) # if using lstm, swap the activation functions if rlayer_type == 'lstm': rlayer_params.update(dict(activation=Logistic(), gate_activation=Tanh())) # lookup layer parameters lookup_params = dict(vocab_size=vocab_size, embedding_dim=50, init=Uniform(-0.05, 0.05)) # Model construction story_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)] query_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)] layers = [MergeMultistream(layers=[story_path, query_path], merge="stack"), Affine(vocab_size, init=GlorotUniform(), activation=Softmax())] return Model(layers=layers)
def create_network(): # weight initialization g1 = Gaussian(scale=0.01) g5 = Gaussian(scale=0.005) c0 = Constant(0) c1 = Constant(1) # model initialization padding = {'pad_d': 1, 'pad_h': 1, 'pad_w': 1} strides = {'str_d': 2, 'str_h': 2, 'str_w': 2} layers = [ Conv((3, 3, 3, 64), padding=padding, init=g1, bias=c0, activation=Rectlin()), Pooling((1, 2, 2), strides={'str_d': 1, 'str_h': 2, 'str_w': 2}), Conv((3, 3, 3, 128), padding=padding, init=g1, bias=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Conv((3, 3, 3, 256), padding=padding, init=g1, bias=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Conv((3, 3, 3, 256), padding=padding, init=g1, bias=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Conv((3, 3, 3, 256), padding=padding, init=g1, bias=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Affine(nout=2048, init=g5, bias=c1, activation=Rectlin()), Dropout(keep=0.5), Affine(nout=2048, init=g5, bias=c1, activation=Rectlin()), Dropout(keep=0.5), Affine(nout=101, init=g1, bias=c0, activation=Softmax()) ] return Model(layers=layers)
def layers(self): init_uni = Uniform(low=-0.1, high=0.1) bn = False return [ DOG((5.0, 4.0, 3.0, 1.6), 1.8), Conv((5, 5, 16), init=init_uni, activation=Rectlin(), batch_norm=bn), Pooling((2, 2)), Conv((5, 5, 32), init=init_uni, activation=Rectlin(), batch_norm=bn), Pooling((2, 2)), Affine(nout=500, init=init_uni, activation=Rectlin(), batch_norm=bn), Affine(nout=self.noutputs, init=init_uni, bias=Constant(0), activation=Softmax() if self.use_softmax else Logistic( shortcut=True)) ]
def main_branch(branch_nodes): return [ Conv((7, 7, 64), padding=3, strides=2, name='conv1/7x7_s2', **common), Pooling(name="pool1/3x3_s2", **pool3s2p1), Conv((1, 1, 64), name='conv2/3x3_reduce', **common), Conv((3, 3, 192), name="conv2/3x3", **commonp1), Pooling(name="pool2/3x3_s2", **pool3s2p1), inception([(64, ), (96, 128), (16, 32), (32, )], name='inception_3a/'), inception([(128, ), (128, 192), (32, 96), (64, )], name='inception_3b/'), Pooling(name='pool3/3x3_s2', **pool3s2p1), inception([(192, ), (96, 208), (16, 48), (64, )], name='inception_4a/'), branch_nodes[0], inception([(160, ), (112, 224), (24, 64), (64, )], name='inception_4b/'), inception([(128, ), (128, 256), (24, 64), (64, )], name='inception_4c/'), inception([(112, ), (144, 288), (32, 64), (64, )], name='inception_4d/'), branch_nodes[1], inception([(256, ), (160, 320), (32, 128), (128, )], name='inception_4e/'), Pooling(name='pool4/3x3_s2', **pool3s2p1), inception([(256, ), (160, 320), (32, 128), (128, )], name='inception_5a/'), inception([(384, ), (192, 384), (48, 128), (128, )], name="inception_5b/"), Pooling(fshape=7, strides=1, op="avg", name='pool5/7x7_s1'), Affine(nout=1000, init=init1, activation=Softmax(), bias=Constant(0), name='loss3/classifier') ]
def __init__(self): self.in_shape = [1024, (2538, 38)] init = Constant(0) image_path = Sequential( [Affine(20, init, bias=init), Affine(10, init, bias=init)]) sent_path = Sequential([Affine(30, init, bias=init), Affine(10, init)]) layers = [ MergeMultistream(layers=[image_path, sent_path], merge="recurrent"), Dropout(keep=0.5), LSTM(4, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True), Affine(20, init, bias=init, activation=Softmax()) ] self.layers = layers self.cost = GeneralizedCostMask(CrossEntropyMulti()) self.model = Model(layers=layers) self.model.initialize(self.in_shape, cost=self.cost)
def __init__(self, depth=9): self.depth = depth depth = 9 train = (3, 32, 32) nfms = [2**(stage + 4) for stage in sorted(list(range(3)) * depth)] strides = [ 1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1]) ] # Now construct the network layers = [Conv(**self.conv_params(3, 16))] layers.append(self.module_s1(nfms[0], True)) for nfm, stride in zip(nfms[1:], strides): res_module = self.module_s1( nfm) if stride == 1 else self.module_s2(nfm) layers.append(res_module) layers.append(BatchNorm()) layers.append(Activation(Rectlin())) layers.append(Pooling('all', op='avg')) layers.append( Affine(10, init=Kaiming(local=False), batch_norm=True, activation=Softmax())) self.layers = layers model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.initialize(train, cost=cost) self.model = model
def build(self): # setup model layers layers = [Affine(nout=100, init=self.init, bias=self.init, activation=Rectlin()), Affine(nout=2, init=self.init, bias=self.init, activation=Softmax())] # initialize model object self.model = Model(layers=layers)
def create_network(): init = Kaiming() padding = dict(pad_d=1, pad_h=1, pad_w=1) strides = dict(str_d=2, str_h=2, str_w=2) dilation = dict(dil_d=2, dil_h=2, dil_w=2) common = dict(init=init, batch_norm=True, activation=Rectlin()) layers = [ Conv((9, 9, 9, 16), padding=padding, strides=strides, init=init, activation=Rectlin()), Conv((5, 5, 5, 32), dilation=dilation, **common), Conv((3, 3, 3, 64), dilation=dilation, **common), Pooling((2, 2, 2), padding=padding, strides=strides), Conv((2, 2, 2, 128), **common), Conv((2, 2, 2, 128), **common), Conv((2, 2, 2, 128), **common), Conv((2, 2, 2, 256), **common), Conv((2, 2, 2, 1024), **common), Conv((2, 2, 2, 4096), **common), Conv((2, 2, 2, 2048), **common), Conv((2, 2, 2, 1024), **common), Dropout(), Affine(2, init=Kaiming(local=False), batch_norm=True, activation=Softmax()) ] return Model(layers=layers)
def create_frcn_model(frcn_fine_tune=False): b1 = BranchNode(name="b1") imagenet_layers = add_vgg_layers() HW = (7, 7) frcn_layers = [ RoiPooling(layers=imagenet_layers, HW=HW, bprop_enabled=frcn_fine_tune), Affine(nout=4096, init=Gaussian(scale=0.005), bias=Constant(.1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.005), bias=Constant(.1), activation=Rectlin()), Dropout(keep=0.5), b1, Affine(nout=21, init=Gaussian(scale=0.01), bias=Constant(0), activation=Softmax()) ] bb_layers = [ b1, Affine(nout=84, init=Gaussian(scale=0.001), bias=Constant(0), activation=Identity()) ] return Model(layers=Tree([frcn_layers, bb_layers]))
def test_softmax_big_inputs(backend_default): np.random.seed(1) be = backend_default assert be.bsz >= 128, 'This tests needs large batch size' act = Softmax() Nout = 1000 # 1000 input and output units to softmax # random inputs x_ = np.random.random((Nout, be.bsz)) x = be.iobuf(Nout) # init input to softmax x[:] = x_ # numpy softmax mx = np.max(x_, axis=0) ex = np.exp(x_ - mx) y_ = ex / np.sum(ex, axis=0) # in-place softmax on device x[:] = act(x) assert allclose_with_out(y_, x.get(), atol=0.0, rtol=1.0e-5)
def create_network(stage_depth, bottleneck): if stage_depth in (0, 18): stages = (2, 2, 2, 2) elif stage_depth in (1, 34, 50): stages = (3, 4, 6, 3) elif stage_depth in (2, 68, 101): stages = (3, 4, 23, 3) elif stage_depth in (3, 102, 152): stages = (3, 8, 36, 3) elif stage_depth in (4, 98, 138): stages = (3, 7, 35, 3) else: raise ValueError('Invalid stage_depth value'.format(stage_depth)) layers = [Conv(**conv_params(7, 64, strides=2)), Pooling(3, strides=2)] # Structure of the deep residual part of the network: # stage_depth modules of 2 convolutional layers each at feature map depths # of 64, 128, 256, 512 nfms = list( itt.chain.from_iterable( [itt.repeat(2**(x + 6), r) for x, r in enumerate(stages)])) strides = [-1] + [ 1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1]) ] for nfm, stride in zip(nfms, strides): layers.append(module_factory(nfm, bottleneck, stride)) layers.append(Pooling('all', op='avg')) layers.append(Conv(**conv_params(1, 1000, relu=False))) layers.append(Activation(Softmax())) return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyMulti())
def create_network(stage_depth): # Structure of the deep residual part of the network: # stage_depth modules of 2 convolutional layers each at feature map depths of 16, 32, 64 nfms = [2**(stage + 4) for stage in sorted(list(range(3)) * stage_depth)] strides = [ 1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1]) ] # Now construct the network layers = [Conv(**conv_params(3, 16))] layers.append(module_s1(nfms[0], True)) for nfm, stride in zip(nfms[1:], strides): res_module = module_s1(nfm) if stride == 1 else module_s2(nfm) layers.append(res_module) layers.append(BatchNorm()) layers.append(Activation(Rectlin())) layers.append(Pooling('all', op='avg')) layers.append( Affine(10, init=Kaiming(local=False), batch_norm=True, activation=Softmax())) return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyMulti())
def constuct_network(): """ Constructs the layers of our RCNN architecture. It is similar to AlexNet but simplified to only a few convolutional layers and 3 LSTM layers. """ layers = [ Conv((11, 11, 64), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=3, strides=4), Pooling(3, strides=2), Conv((7, 7, 128), init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin(), padding=2), Pooling(3, strides=2), Conv((5, 5, 256), init=Gaussian(scale=0.03), bias=Constant(0), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Pooling(3, strides=2), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), DropoutBinary(keep=0.5), LSTM(512, init=Gaussian(scale=0.03), activation=Rectlin(), gate_activation=Tanh()), LSTM(512, init=Gaussian(scale=0.03), activation=Rectlin(), gate_activation=Tanh()), LSTM(512, init=Gaussian(scale=0.03), activation=Rectlin(), gate_activation=Tanh()), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), DropoutBinary(keep=0.5), Affine(nout=101, init=Gaussian(scale=0.01), bias=Constant(-7), activation=Softmax()) ] return Model(layers=layers)
def __init__(self, num_classes, nms_threshold=0.45, nms_topk=400, topk=200, threshold=0.01, name=None): super(DetectionOutput, self).__init__(name) self.num_classes = num_classes self.nms_threshold = nms_threshold self.nms_topk = nms_topk self.topk = topk self.threshold = 0.01 self.softmax = Softmax(axis=1)
def aux_branch(bnode): return [ bnode, Pooling(fshape=5, strides=3, op="avg"), Conv((1, 1, 128), **common), Affine(nout=1024, init=init1, activation=relu, bias=bias), Dropout(keep=0.3), Affine(nout=1000, init=init1, activation=Softmax(), bias=Constant(0)) ]
def aux_branch(bnode, ind): # TODO put dropout back in nm = 'loss%d/' % ind return [bnode, Pooling(fshape=5, strides=3, op="avg", name=nm+'ave_pool'), Conv((1, 1, 128), name=nm+'conv', **common), Affine(nout=1024, init=init1, activation=relu, bias=bias, name=nm+'fc'), Dropout(keep=1.0, name=nm+'drop_fc'), Affine(nout=1000, init=init1, activation=Softmax(), bias=Constant(0), name=nm+'classifier')]
def prepare_model(ninputs=9600, nclass=5): """ Set up and compile the model architecture (Logistic regression) """ layers = [Affine(nout=nclass, init=Gaussian(loc=0.0, scale=0.01), activation=Softmax())] cost = GeneralizedCost(costfunc=CrossEntropyMulti()) opt = Adam() lrmodel = Model(layers=layers) return lrmodel, opt, cost
def layers(self): return [ Conv((7, 7, 96), init=Gaussian(scale=0.0001), bias=Constant(0), activation=Rectlin(), padding=3, strides=1), LRN(31, ascale=0.001, bpower=0.75), Pooling(3, strides=2, padding=1), Conv((5, 5, 256), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=2, strides=1), LRN(31, ascale=0.001, bpower=0.75), Pooling(3, strides=2, padding=1), Conv((3, 3, 384), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=1, strides=1), Conv((3, 3, 384), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=1, strides=1), Conv((3, 3, 256), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=1, strides=1), Pooling(3, strides=2, padding=1), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(0), activation=Identity()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(0), activation=Identity()), Dropout(keep=0.5), Affine(nout=self.noutputs, init=Gaussian(scale=0.01), bias=Constant(0), activation=Softmax() if self.use_softmax else Logistic( shortcut=True)) ]
def fit_model(train_set, val_set, num_epochs=50): relu = Rectlin() conv_params = { 'strides': 1, 'padding': 1, 'init': Xavier(local=True), # Xavier sqrt(3)/num_inputs [CHECK THIS] 'bias': Constant(0), 'activation': relu } layers = [] layers.append(Conv((3, 3, 128), **conv_params)) # 3x3 kernel * 128 nodes layers.append(Pooling(2)) layers.append(Conv((3, 3, 128), **conv_params)) layers.append(Pooling(2)) # Highest value from 2x2 window. layers.append(Conv((3, 3, 128), **conv_params)) layers.append( Dropout(keep=0.5) ) # Flattens Convolution into a flat array, with probability 0.5 sets activation values to 0 layers.append( Affine(nout=128, init=GlorotUniform(), bias=Constant(0), activation=relu) ) # 1 value per conv kernel - Linear Combination of layers layers.append(Dropout(keep=0.5)) layers.append( Affine(nout=2, init=GlorotUniform(), bias=Constant(0), activation=Softmax(), name="class_layer")) # initialize model object cnn = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) optimizer = Adam() # callbacks = Callbacks(cnn) # out_fname = 'yarin_fdl_out_data.h5' callbacks = Callbacks(cnn, eval_set=val_set, eval_freq=1) # , output_file=out_fname cnn.fit(train_set, optimizer=optimizer, num_epochs=num_epochs, cost=cost, callbacks=callbacks) return cnn
def constuct_network(): """ Constructs the layers of the AlexNet architecture. """ layers = [ Conv((11, 11, 64), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=3, strides=4), Pooling(3, strides=2), Conv((5, 5, 192), init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin(), padding=2), Pooling(3, strides=2), Conv((3, 3, 384), init=Gaussian(scale=0.03), bias=Constant(0), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Pooling(3, strides=2), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), DropoutBinary(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), DropoutBinary(keep=0.5), Affine(nout=101, init=Gaussian(scale=0.01), bias=Constant(-7), activation=Softmax()) ] return Model(layers=layers)
def constructCNN(self): layers = [] if self.network_type == "idsia": layers.append( Conv((3, 3, 100), strides=1, init=Kaiming(), bias=Constant(0.0), activation=Rectlin(), name="Conv1")) layers.append(Pooling(2, op="max", strides=2, name="neon_pool1")) layers.append( Conv((4, 4, 150), strides=1, init=Kaiming(), bias=Constant(0.0), activation=Rectlin(), name="Conv2")) layers.append(Pooling(2, op="max", strides=2, name="neon_pool2")) layers.append( Conv((3, 3, 250), strides=1, init=Kaiming(), bias=Constant(0.0), activation=Rectlin(), name="Conv3")) layers.append(Pooling(2, op="max", strides=2, name="neon_pool3")) layers.append( Affine(nout=200, init=Kaiming(local=False), bias=Constant(0.0), activation=Rectlin(), name="neon_fc1")) layers.append( Affine(nout=self.class_num, init=Kaiming(local=False), bias=Constant(0.0), activation=Softmax(), name="neon_fc2")) elif self.network_type == "resnet-56": layers = resnet(9, self.class_num, int(self.resize_size[0] / 4)) # 6*9 + 2 = 56 elif self.network_type == "resnet-32": layers = resnet(5, self.class_num, int(self.resize_size[0] / 4)) # 6*5 + 2 = 32 elif self.network_type == "resnet-20": layers = resnet(3, self.class_num, int(self.resize_size[0] / 4)) # 6*3 + 2 = 20 return layers
def create_network(): layers = [ Conv((11, 11, 64), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=3, strides=4), Pooling(3, strides=2), Conv((5, 5, 192), init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin(), padding=2), Pooling(3, strides=2), Conv((3, 3, 384), init=Gaussian(scale=0.03), bias=Constant(0), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Pooling(3, strides=2), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=1000, init=Gaussian(scale=0.01), bias=Constant(-7), activation=Softmax()), ] return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyMulti())
def main_branch(self, nout=100): return [ Conv(**self.conv_params(3, 32, strides=2, padding=0)), Pooling(**self.pool3s2p0), Conv(**self.conv_params(1, 80, strides=1, padding=0)), Pooling(**self.pool3s2p0), self.inception([(64, ), (48, 64), (64, 96), (32, )], b2fsz=5), self.inception([(64, ), (48, 64), (64, 96), (64, )], b2fsz=5), self.inception_inception([(64, ), (64, 64), (64, 64, 64), (192, )]), self.inception_inception([(64, ), (64, 64), (64, 64, 64), (192, )]), Pooling(fshape='all', strides=1, op="avg"), Dropout(keep=0.8), Conv(**self.conv_params(1, nout, activation=Softmax())) ]
def __init__(self): self.in_shape = (1, 32, 32) init_norm = Gaussian(loc=0.0, scale=0.01) normrelu = dict(init=init_norm, activation=Rectlin()) normsigm = dict(init=init_norm, activation=Logistic(shortcut=True)) normsoft = dict(init=init_norm, activation=Softmax()) # setup model layers b1 = BranchNode(name="b1") b2 = BranchNode(name="b2") p1 = [ Affine(nout=100, name="main1", **normrelu), b1, Affine(nout=32, name="main2", **normrelu), Affine(nout=160, name="main3", **normrelu), b2, Affine(nout=32, name="main2", **normrelu), # make next layer big to check sizing Affine(nout=320, name="main2", **normrelu), Affine(nout=10, name="main4", **normsoft) ] p2 = [ b1, Affine(nout=16, name="branch1_1", **normrelu), Affine(nout=10, name="branch1_2", **normsigm) ] p3 = [ b2, Affine(nout=16, name="branch2_1", **normrelu), Affine(nout=10, name="branch2_2", **normsigm) ] self.cost = Multicost(costs=[ GeneralizedCost(costfunc=CrossEntropyMulti()), GeneralizedCost(costfunc=CrossEntropyBinary()), GeneralizedCost(costfunc=CrossEntropyBinary()) ], weights=[1, 0., 0.]) self.layers = SingleOutputTree([p1, p2, p3], alphas=[1, .2, .2]) self.model = Model(layers=self.layers) self.model.initialize(self.in_shape, cost=self.cost)
def create_network(): layers = [ DataTransform(transform=Normalizer(divisor=128.)), Conv((11, 11, 96), init=Kaiming(), activation=Rectlin(), strides=4, padding=1), Conv((1, 1, 96), init=Kaiming(), activation=Rectlin(), strides=1), Conv((3, 3, 96), init=Kaiming(), activation=Rectlin(), strides=2, padding=1), # 54->2, Conv((5, 5, 256), init=Kaiming(), activation=Rectlin(), strides=1), # 27->2, Conv((1, 1, 256), init=Kaiming(), activation=Rectlin(), strides=1), Conv((3, 3, 256), init=Kaiming(), activation=Rectlin(), strides=2, padding=1), # 23->1, Conv((3, 3, 384), init=Kaiming(), activation=Rectlin(), strides=1, padding=1), Conv((1, 1, 384), init=Kaiming(), activation=Rectlin(), strides=1), Conv((3, 3, 384), init=Kaiming(), activation=Rectlin(), strides=2, padding=1), # 12->, Dropout(keep=0.5), Conv((3, 3, 1024), init=Kaiming(), activation=Rectlin(), strides=1, padding=1), Conv((1, 1, 1024), init=Kaiming(), activation=Rectlin(), strides=1), Conv((1, 1, 1000), init=Kaiming(), activation=Rectlin(), strides=1), Pooling(6, op='avg'), Activation(Softmax()) ] return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyMulti())
def run(args, train, test): init_uni = Uniform(low=-0.1, high=0.1) opt_gdm = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9, stochastic_round=args.rounding) layers = [Conv((5, 5, 16), init=init_uni, activation=Rectlin(), batch_norm=True), Pooling((2, 2)), Conv((5, 5, 32), init=init_uni, activation=Rectlin(), batch_norm=True), Pooling((2, 2)), Affine(nout=500, init=init_uni, activation=Rectlin(), batch_norm=True), Affine(nout=10, init=init_uni, activation=Softmax())] cost = GeneralizedCost(costfunc=CrossEntropyMulti()) mlp = Model(layers=layers) callbacks = Callbacks(mlp, eval_set=test, **args.callback_args) mlp.fit(train, optimizer=opt_gdm, num_epochs=args.epochs, cost=cost, callbacks=callbacks) err = mlp.eval(test, metric=Misclassification())*100 print('Misclassification error = %.2f%%' % err) return err
def layers(self): bn = True return [ Conv((7, 7, 96), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=3, strides=1)\ if self.bn_first_layer else\ Conv((7, 7, 96), init=Kaiming(), bias=Constant(0), activation=Explin(), padding=3, strides=1), Pooling(3, strides=2, padding=1), Conv((7, 7, 128), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=3, strides=1), Pooling(3, strides=2, padding=1), Conv((5, 5, 256), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=2, strides=1), Pooling(3, strides=2, padding=1), Conv((3, 3, 384), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1), Conv((3, 3, 384), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1), Conv((3, 3, 384), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1), Pooling(3, strides=2, padding=1, op='avg'), Affine(nout=self.noutputs, init=Kaiming(), activation=Explin(), batch_norm=bn), Affine(nout=self.noutputs, init=Kaiming(), activation=Explin(), batch_norm=bn), Affine(nout=self.noutputs, init=Kaiming(), bias=Constant(0), activation=Softmax() if self.use_softmax else Logistic(shortcut=True)) ]
def resnet(depth, num_classes, s): # Structure of the deep residual part of the network: # args.depth modules of 2 convolutional layers each at feature map depths of 16, 32, 64 nfms = [2**(stage + 4) for stage in sorted(list(range(3)) * depth)] strides = [1] + [ 1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1]) ] # Now construct the network layers = [Conv(**conv_params(3, 16))] for nfm, stride in zip(nfms, strides): layers.append(module_factory(nfm, stride)) layers.append(Pooling(s, op='avg')) layers.append( Affine(nout=num_classes, init=Kaiming(local=False), batch_norm=True, activation=Softmax())) return layers