def module_factory(nfm, stride=1): mainpath = [ Conv(**conv_params(3, nfm, stride=stride)), Conv(**conv_params(3, nfm, relu=False)) ] sidepath = [SkipNode() if stride == 1 else Conv(**id_params(nfm))] module = [MergeSum([mainpath, sidepath]), Activation(Rectlin())] return module
def module_factory(nfm, stride=1): projection = None if stride == 1 else IdentityInit() module = [ Conv(**conv_params(3, nfm, stride=stride)), Conv(**conv_params(3, nfm, relu=False)) ] module = module if args.network == 'plain' else [ ResidualModule(module, projection) ] module.append(Activation(Rectlin())) return module
def gen_model(num_channels, height, width): assert NervanaObject.be is not None, 'need to generate a backend before using this function' init_uni = Kaiming() # we have 1 issue, they have bias layers we don't allow batchnorm and biases conv_common = dict(padding=1, init=init_uni, activation=Rectlin(), batch_norm=True) # set up the layers layers = [] # need to store a ref to the pooling layers to pass # to the upsampling layers to get the argmax indicies # for upsampling, this stack holds the pooling layer refs pool_layers = [] # first loop generates the encoder layers nchan = [64, 128, 256, 512, 512] for ind in range(len(nchan)): nchanu = nchan[ind] lrng = 2 if ind <= 1 else 3 for lind in range(lrng): nm = 'conv%d_%d' % (ind+1, lind+1) layers.append(Conv((3, 3, nchanu), strides=1, name=nm, **conv_common)) layers.append(Pooling(2, strides=2, name='conv%d_pool' % ind)) pool_layers.append(layers[-1]) if ind >= 2: layers.append(Dropout(keep=0.5, name='drop%d' % (ind+1))) # this loop generates the decoder layers for ind in range(len(nchan)-1,-1,-1): nchanu = nchan[ind] lrng = 2 if ind <= 1 else 3 # upsampling layers need a ref to the corresponding pooling layer # to access the argmx indices for upsampling layers.append(Upsampling(2, pool_layers.pop(), strides=2, padding=0, name='conv%d_unpool' % ind)) for lind in range(lrng): nm = 'deconv%d_%d' % (ind+1, lind+1) if ind < 4 and lind == lrng-1: nchanu = nchan[ind]/2 layers.append(Conv((3, 3, nchanu), strides=1, name=nm, **conv_common)) if ind == 0: break if ind >= 2: layers.append(Dropout(keep=0.5, name='drop%d' % (ind+1))) # last conv layer outputs 12 channels, 1 for each output class # with a pixelwise softmax over the channels act_last = PixelwiseSoftmax(num_channels, height, width, name="PixelwiseSoftmax") conv_last = dict(padding=1, init=init_uni, activation=act_last, batch_norm=False) layers.append(Conv((3, 3, num_channels), strides=1, name='deconv_out', **conv_last)) return layers
def _createLayers(self, num_actions): # create network init_xavier_conv = Xavier(local=True) init_xavier_affine = Xavier(local=False) # init_uniform_conv = Uniform(low=-.01, high=.01) # init_uniform_affine = Uniform(low=-.01, high=.01) layers = [] # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity. # layers.append(Conv((8, 8, 32), strides=4, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm)) layers.append(Conv((5, 5, 32), strides=2, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm)) # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity. # layers.append(Conv((4, 4, 64), strides=2, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm)) layers.append(Conv((5, 5, 32), strides=2, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm)) # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier. # layers.append(Conv((3, 3, 64), strides=1, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm)) # The final hidden layer is fully-connected and consists of 512 rectifier units. layers.append(Affine(nout=512, init=init_xavier_affine, activation=Rectlin(), batch_norm=self.batch_norm)) # The output layer is a fully-connected linear layer with a single output for each valid action. layers.append(Affine(nout=num_actions, init = init_xavier_affine)) return layers
def create_network(): init = GlorotUniform() layers = [ Conv((3, 3, 128), init=init, activation=Rectlin(), strides=dict(str_h=1, str_w=2)), Conv((3, 3, 256), init=init, batch_norm=True, activation=Rectlin()), Pooling(2, strides=2), Conv((2, 2, 512), init=init, batch_norm=True, activation=Rectlin()), DeepBiRNN(256, init=init, activation=Rectlin(), reset_cells=True, depth=3), RecurrentLast(), Affine(32, init=init, batch_norm=True, activation=Rectlin()), Affine(nout=2, init=init, activation=Softmax()) ] return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyBinary())
def fit_model(train_set, val_set, num_epochs=50): relu = Rectlin() conv_params = { 'strides': 1, 'padding': 1, 'init': Xavier(local=True), # Xavier sqrt(3)/num_inputs [CHECK THIS] 'bias': Constant(0), 'activation': relu } layers = [] layers.append(Conv((3, 3, 128), **conv_params)) # 3x3 kernel * 128 nodes layers.append(Pooling(2)) layers.append(Conv((3, 3, 128), **conv_params)) layers.append(Pooling(2)) # Highest value from 2x2 window. layers.append(Conv((3, 3, 128), **conv_params)) layers.append( Dropout(keep=0.5) ) # Flattens Convolution into a flat array, with probability 0.5 sets activation values to 0 layers.append( Affine(nout=128, init=GlorotUniform(), bias=Constant(0), activation=relu) ) # 1 value per conv kernel - Linear Combination of layers layers.append(Dropout(keep=0.5)) layers.append( Affine(nout=2, init=GlorotUniform(), bias=Constant(0), activation=Softmax(), name="class_layer")) # initialize model object cnn = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) optimizer = Adam() # callbacks = Callbacks(cnn) # out_fname = 'yarin_fdl_out_data.h5' callbacks = Callbacks(cnn, eval_set=val_set, eval_freq=1) # , output_file=out_fname cnn.fit(train_set, optimizer=optimizer, num_epochs=num_epochs, cost=cost, callbacks=callbacks) return cnn
def test_model_N_S_setter(backend_default): # weight initialization init = Constant(0.08) # model initialization layers = [ Recurrent(150, init, activation=Logistic()), Affine(100, init, bias=init, activation=Rectlin()) ] model = Model(layers=layers) model.set_batch_size(20) model.set_seq_len(10)
def build(self): """ Build the model's layers """ first_layer_dens = 64 second_layer_dens = 64 output_layer_dens = 2 # setup weight initialization function init_norm = Gaussian(scale=0.01) # setup model layers layers = [ Affine(nout=first_layer_dens, init=init_norm, activation=Rectlin()), Affine(nout=second_layer_dens, init=init_norm, activation=Rectlin()), Affine(nout=output_layer_dens, init=init_norm, activation=Logistic(shortcut=True)) ] # initialize model object self.model = Model(layers=layers)
def create_network(): # weight initialization g1 = Gaussian(scale=0.01) g5 = Gaussian(scale=0.005) c0 = Constant(0) c1 = Constant(1) # model initialization padding = {'pad_d': 1, 'pad_h': 1, 'pad_w': 1} strides = {'str_d': 2, 'str_h': 2, 'str_w': 2} layers = [ Conv((3, 3, 3, 64), padding=padding, init=g1, bias=c0, activation=Rectlin()), Pooling((1, 2, 2), strides={ 'str_d': 1, 'str_h': 2, 'str_w': 2 }), Conv((3, 3, 3, 128), padding=padding, init=g1, bias=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Conv((3, 3, 3, 256), padding=padding, init=g1, bias=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Conv((3, 3, 3, 256), padding=padding, init=g1, bias=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Conv((3, 3, 3, 256), padding=padding, init=g1, bias=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Affine(nout=2048, init=g5, bias=c1, activation=Rectlin()), Dropout(keep=0.5), Affine(nout=2048, init=g5, bias=c1, activation=Rectlin()), Dropout(keep=0.5), Affine(nout=101, init=g1, bias=c0, activation=Softmax()) ] return Model(layers=layers)
def layers(self): init_uni = Uniform(low=-0.1, high=0.1) bn = False return [ Conv((5, 5, 16), init=init_uni, activation=Rectlin(), batch_norm=bn), Pooling((2, 2)), Conv((5, 5, 32), init=init_uni, activation=Rectlin(), batch_norm=bn), Pooling((2, 2)), Affine(nout=500, init=init_uni, activation=Rectlin(), batch_norm=bn), Affine(nout=self.noutputs, init=init_uni, bias=Constant(0), activation=Softmax() if self.use_softmax else Logistic( shortcut=True)) ]
def constuct_network(): """ Constructs the layers of the AlexNet architecture. """ layers = [ Conv((11, 11, 64), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=3, strides=4), Pooling(3, strides=2), Conv((5, 5, 192), init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin(), padding=2), Pooling(3, strides=2), Conv((3, 3, 384), init=Gaussian(scale=0.03), bias=Constant(0), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Pooling(3, strides=2), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), DropoutBinary(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), DropoutBinary(keep=0.5), Affine(nout=101, init=Gaussian(scale=0.01), bias=Constant(-7), activation=Softmax()) ] return Model(layers=layers)
def module_factory(nfm, bottleneck=True, stride=1): nfm_out = nfm * 4 if bottleneck else nfm use_skip = True if stride == 1 else False stride = abs(stride) sidepath = [SkipNode() if use_skip else Conv( **conv_params(1, nfm_out, stride, False))] if bottleneck: mainpath = [Conv(**conv_params(1, nfm, stride)), Conv(**conv_params(3, nfm)), Conv(**conv_params(1, nfm_out, relu=False))] else: mainpath = [Conv(**conv_params(3, nfm, stride)), Conv(**conv_params(3, nfm, relu=False))] return [MergeSum([mainpath, sidepath]), Activation(Rectlin())]
def __init__(self): self.in_shape = (1, 32, 32) init_norm = Gaussian(loc=0.0, scale=0.01) normrelu = dict(init=init_norm, activation=Rectlin()) normsigm = dict(init=init_norm, activation=Logistic(shortcut=True)) normsoft = dict(init=init_norm, activation=Softmax()) # setup model layers b1 = BranchNode(name="b1") b2 = BranchNode(name="b2") p1 = [ Affine(nout=100, name="main1", **normrelu), b1, Affine(nout=32, name="main2", **normrelu), Affine(nout=160, name="main3", **normrelu), b2, Affine(nout=32, name="main2", **normrelu), # make next layer big to check sizing Affine(nout=320, name="main2", **normrelu), Affine(nout=10, name="main4", **normsoft) ] p2 = [ b1, Affine(nout=16, name="branch1_1", **normrelu), Affine(nout=10, name="branch1_2", **normsigm) ] p3 = [ b2, Affine(nout=16, name="branch2_1", **normrelu), Affine(nout=10, name="branch2_2", **normsigm) ] self.cost = Multicost(costs=[ GeneralizedCost(costfunc=CrossEntropyMulti()), GeneralizedCost(costfunc=CrossEntropyBinary()), GeneralizedCost(costfunc=CrossEntropyBinary()) ], weights=[1, 0., 0.]) self.layers = SingleOutputTree([p1, p2, p3], alphas=[1, .2, .2]) self.model = Model(layers=self.layers) self.model.initialize(self.in_shape, cost=self.cost)
def create_network(): layers = [ Conv((11, 11, 64), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=3, strides=4), Pooling(3, strides=2), Conv((5, 5, 192), init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin(), padding=2), Pooling(3, strides=2), Conv((3, 3, 384), init=Gaussian(scale=0.03), bias=Constant(0), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Pooling(3, strides=2), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=1000, init=Gaussian(scale=0.01), bias=Constant(-7), activation=Softmax()), ] return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyMulti())
def conv_params(fsize, nfm, padding='SAME', strides=1, activation=Rectlin(), batch_norm=True): fsize = fsize if isinstance(fsize, tuple) else (fsize, fsize) fshape = fsize + (nfm, ) padding = { 'pad_h': (fsize[0] // 2 if padding == 'SAME' else 0), 'pad_w': (fsize[1] // 2 if padding == 'SAME' else 0), 'pad_d': 0 } strides = {'str_h': strides, 'str_w': strides, 'str_d': 1} return dict(fshape=fshape, strides=strides, activation=activation, padding=padding, batch_norm=batch_norm, init=Kaiming(local=True))
def __init__(self): self.in_shape = (3, 32, 32) relu = Rectlin() init_use = Constant(0) conv = dict(init=init_use, batch_norm=False, activation=relu) convp1 = dict(init=init_use, batch_norm=False, bias=init_use, activation=relu, padding=1) convp1s2 = dict(init=init_use, batch_norm=False, bias=init_use, padding=1, strides=2) layers = [ Dropout(keep=.8), Conv((3, 3, 96), **convp1), Conv((3, 3, 96), **convp1), Conv((3, 3, 96), **convp1s2), Dropout(keep=.5), Conv((3, 3, 192), **convp1), Conv((3, 3, 192), **convp1), Conv((3, 3, 192), **convp1s2), Dropout(keep=.5), Conv((3, 3, 192), **convp1), Conv((1, 1, 192), **conv), Conv((1, 1, 16), **conv), Pooling(8, op="avg"), Activation(Softmax()) ] self.layers = layers model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.initialize(self.in_shape, cost=cost) self.model = model
def test_model_get_outputs(backend_default, data): dataset = MNIST(path=data) train_set = dataset.train_iter init_norm = Gaussian(loc=0.0, scale=0.1) layers = [ Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True)) ] mlp = Model(layers=layers) out_list = [] mlp.initialize(train_set) for x, t in train_set: x = mlp.fprop(x) out_list.append(x.get().T.copy()) ref_output = np.vstack(out_list) train_set.reset() output = mlp.get_outputs(train_set) assert np.allclose(output, ref_output[:output.shape[0], :]) # test model benchmark inference mlp.benchmark(train_set, inference=True, niterations=5)