def _createLayers(self, num_actions): # create network init_xavier_conv = Xavier(local=True) init_xavier_affine = Xavier(local=False) # init_uniform_conv = Uniform(low=-.01, high=.01) # init_uniform_affine = Uniform(low=-.01, high=.01) layers = [] # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity. # layers.append(Conv((8, 8, 32), strides=4, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm)) layers.append( Conv((5, 5, 32), strides=2, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm)) # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity. # layers.append(Conv((4, 4, 64), strides=2, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm)) layers.append( Conv((5, 5, 32), strides=2, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm)) # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier. # layers.append(Conv((3, 3, 64), strides=1, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm)) # The final hidden layer is fully-connected and consists of 512 rectifier units. layers.append( Affine(nout=512, init=init_xavier_affine, activation=Rectlin(), batch_norm=self.batch_norm)) # The output layer is a fully-connected linear layer with a single output for each valid action. layers.append(Affine(nout=num_actions, init=init_xavier_affine)) return layers
def create_network(): # weight initialization g1 = Gaussian(scale=0.01) g5 = Gaussian(scale=0.005) c0 = Constant(0) c1 = Constant(1) # model initialization padding = {'pad_d': 1, 'pad_h': 1, 'pad_w': 1} strides = {'str_d': 2, 'str_h': 2, 'str_w': 2} layers = [ Conv((3, 3, 3, 64), padding=padding, init=g1, bias=c0, activation=Rectlin()), Pooling((1, 2, 2), strides={'str_d': 1, 'str_h': 2, 'str_w': 2}), Conv((3, 3, 3, 128), padding=padding, init=g1, bias=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Conv((3, 3, 3, 256), padding=padding, init=g1, bias=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Conv((3, 3, 3, 256), padding=padding, init=g1, bias=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Conv((3, 3, 3, 256), padding=padding, init=g1, bias=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Affine(nout=2048, init=g5, bias=c1, activation=Rectlin()), Dropout(keep=0.5), Affine(nout=2048, init=g5, bias=c1, activation=Rectlin()), Dropout(keep=0.5), Affine(nout=101, init=g1, bias=c0, activation=Softmax()) ] return Model(layers=layers)
def _createLayers(self, num_actions): init_xavier_conv = Xavier(local=True) init_xavier_affine = Xavier(local=False) layers = [] layers.append( Conv((8, 8, 32), strides=4, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm)) layers.append( Conv((4, 4, 64), strides=2, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm)) layers.append( Conv((2, 2, 128), strides=1, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm)) layers.append( Affine(nout=256, init=init_xavier_affine, activation=Rectlin(), batch_norm=self.batch_norm)) layers.append(Affine(nout=num_actions, init=init_xavier_affine)) return layers
def build(self): # setup model layers layers = [Affine(nout=100, init=self.init, bias=self.init, activation=Rectlin()), Affine(nout=2, init=self.init, bias=self.init, activation=Softmax())] # initialize model object self.model = Model(layers=layers)
def _createLayers(self, num_actions): # create network init_norm = Gaussian(loc=0.0, scale=0.01) layers = [] if self.screen_dim == (84, 84): # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity. logger.info("Using 8x8 filter for first Conv") layers.append( Conv((8, 8, 32), strides=4, init=init_norm, activation=Rectlin())) elif self.screen_dim == (52, 40): # The first hidden layer convolves 32 filters of 5x4 with stride 2 with the input image and applies a rectifier nonlinearity. logger.info("Using 5x4 filter for first Conv") layers.append( Conv((5, 4, 32), strides=2, init=init_norm, activation=Rectlin())) else: raise NotImplementedError("Unsupported screen dim.") # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity. layers.append( Conv((4, 4, 64), strides=2, init=init_norm, activation=Rectlin())) # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier. layers.append( Conv((3, 3, 64), strides=1, init=init_norm, activation=Rectlin())) # The final hidden layer is fully-connected and consists of 512 rectifier units. layers.append(Affine(nout=512, init=init_norm, activation=Rectlin())) # The output layer is a fully-connected linear layer with a single output for each valid action. layers.append(Affine(nout=num_actions, init=init_norm)) return layers
def layers(self): init_uni = Uniform(low=-0.1, high=0.1) bn = False return [ DOG((5.0, 4.0, 3.0, 1.6), 1.8), Conv((5, 5, 16), init=init_uni, activation=Rectlin(), batch_norm=bn), Pooling((2, 2)), Conv((5, 5, 32), init=init_uni, activation=Rectlin(), batch_norm=bn), Pooling((2, 2)), Affine(nout=500, init=init_uni, activation=Rectlin(), batch_norm=bn), Affine(nout=self.noutputs, init=init_uni, bias=Constant(0), activation=Softmax() if self.use_softmax else Logistic( shortcut=True)) ]
def main(args): # load up the mnist data set dataset = MNIST(path=args.data_dir) # initialize model object mlp = Model(layers=[ Affine(nout=100, init=Gaussian(loc=0.0, scale=0.01), activation=Rectlin()), Affine(nout=10, init=Gaussian(loc=0.0, scale=0.01), activation=Logistic(shortcut=True)) ]) # setup optimizer optimizer = GradientDescentMomentum(0.1, momentum_coef=0.9, stochastic_round=args.rounding) # configure callbacks callbacks = Callbacks(mlp, eval_set=dataset.valid_iter, **args.callback_args) # run fit # setup cost function as CrossEntropy mlp.fit(dataset.train_iter, optimizer=optimizer, num_epochs=args.epochs, cost=GeneralizedCost(costfunc=CrossEntropyBinary()), callbacks=callbacks) error_rate = mlp.eval(dataset.valid_iter, metric=Misclassification()) neon_logger.display('Classification accuracy = %.4f' % (1 - error_rate))
def build_model(self): # setup weight initialization function init_norm = Gaussian(loc=0.0, scale=0.01) # setup model layers layers = [ Affine(nout=100, init=init_norm, bias=Uniform(), activation=Rectlin()), Affine(nout=10, init=init_norm, bias=Uniform(), activation=Logistic(shortcut=True)) ] # setup cost function as CrossEntropy self.cost = GeneralizedCost(costfunc=CrossEntropyBinary()) # setup optimizer self.optimizer = GradientDescentMomentum( 0.1, momentum_coef=0.9, stochastic_round=self.args.rounding) # initialize model object self.model = ModelDist(layers=layers)
def create_frcn_model(frcn_fine_tune=False): b1 = BranchNode(name="b1") imagenet_layers = add_vgg_layers() HW = (7, 7) frcn_layers = [ RoiPooling(layers=imagenet_layers, HW=HW, bprop_enabled=frcn_fine_tune), Affine(nout=4096, init=Gaussian(scale=0.005), bias=Constant(.1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.005), bias=Constant(.1), activation=Rectlin()), Dropout(keep=0.5), b1, Affine(nout=21, init=Gaussian(scale=0.01), bias=Constant(0), activation=Softmax()) ] bb_layers = [ b1, Affine(nout=84, init=Gaussian(scale=0.001), bias=Constant(0), activation=Identity()) ] return Model(layers=Tree([frcn_layers, bb_layers]))
def __init__(self): self.in_shape = [1024, (2538, 38)] init = Constant(0) image_path = Sequential( [Affine(20, init, bias=init), Affine(10, init, bias=init)]) sent_path = Sequential([Affine(30, init, bias=init), Affine(10, init)]) layers = [ MergeMultistream(layers=[image_path, sent_path], merge="recurrent"), Dropout(keep=0.5), LSTM(4, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True), Affine(20, init, bias=init, activation=Softmax()) ] self.layers = layers self.cost = GeneralizedCostMask(CrossEntropyMulti()) self.model = Model(layers=layers) self.model.initialize(self.in_shape, cost=self.cost)
def _createLayers(self, num_actions): # create network init_norm = Gaussian(loc=0.0, scale=0.01) layers = [] # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity. layers.append( Conv((8, 8, 32), strides=4, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity. layers.append( Conv((4, 4, 64), strides=2, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier. layers.append( Conv((3, 3, 64), strides=1, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The final hidden layer is fully-connected and consists of 512 rectifier units. layers.append( Affine(nout=512, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The output layer is a fully-connected linear layer with a single output for each valid action. layers.append(Affine(nout=num_actions, init=init_norm)) return layers
def run(train, test): init = Gaussian(scale=0.01) layers = [ Conv((3, 3, 128), init=init, activation=Rectlin(), strides=dict(str_h=1, str_w=2)), Conv((3, 3, 256), init=init, batch_norm=True, activation=Rectlin()), Pooling(2, strides=2), Conv((2, 2, 512), init=init, batch_norm=True, activation=Rectlin()), DeepBiRNN(256, init=init, activation=Rectlin(), reset_cells=True, depth=3), RecurrentLast(), Affine(32, init=init, batch_norm=True, activation=Rectlin()), Affine(nout=common['nclasses'], init=init, activation=Softmax()) ] model = Model(layers=layers) opt = Adadelta() metric = Misclassification() callbacks = Callbacks(model, eval_set=test, metric=metric, **args.callback_args) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) return model
def __init__(self, overlapping_classes=None, exclusive_classes=None, analytics_input=True, network_type='conv_net', num_words=60, width=100, lookup_size=0, lookup_dim=0, optimizer=Adam()): assert (overlapping_classes is not None) or (exclusive_classes is not None) self.width = width self.num_words = num_words self.overlapping_classes = overlapping_classes self.exclusive_classes = exclusive_classes self.analytics_input = analytics_input self.recurrent = network_type == 'lstm' self.lookup_size = lookup_size self.lookup_dim = lookup_dim init = GlorotUniform() activation = Rectlin(slope=1E-05) gate = Logistic() input_layers = self.input_layers(analytics_input, init, activation, gate) if self.overlapping_classes is None: output_layers = [ Affine(len(self.exclusive_classes), init, activation=Softmax()) ] elif self.exclusive_classes is None: output_layers = [ Affine(len(self.overlapping_classes), init, activation=Logistic()) ] else: output_branch = BranchNode(name='exclusive_overlapping') output_layers = Tree([[ SkipNode(), output_branch, Affine(len(self.exclusive_classes), init, activation=Softmax()) ], [ output_branch, Affine(len(self.overlapping_classes), init, activation=Logistic()) ]]) layers = [ input_layers, # this is where inputs meet, and where we may want to add depth or # additional functionality Dropout(keep=0.8), output_layers ] super(ClassifierNetwork, self).__init__(layers, optimizer=optimizer)
def _createLayers(self): # create network init_norm = Gaussian(loc=0.0, scale=0.01) layers = [] for i in xrange(self.num_layers): layers.append(Affine(nout=self.hidden_nodes, init=init_norm, activation=Rectlin())) layers.append(Affine(nout=self.num_actions, init = init_norm)) return layers
def constuct_network(): """ Constructs the layers of our RCNN architecture. It is similar to AlexNet but simplified to only a few convolutional layers and 3 LSTM layers. """ layers = [ Conv((11, 11, 64), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=3, strides=4), Pooling(3, strides=2), Conv((7, 7, 128), init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin(), padding=2), Pooling(3, strides=2), Conv((5, 5, 256), init=Gaussian(scale=0.03), bias=Constant(0), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Pooling(3, strides=2), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), DropoutBinary(keep=0.5), LSTM(512, init=Gaussian(scale=0.03), activation=Rectlin(), gate_activation=Tanh()), LSTM(512, init=Gaussian(scale=0.03), activation=Rectlin(), gate_activation=Tanh()), LSTM(512, init=Gaussian(scale=0.03), activation=Rectlin(), gate_activation=Tanh()), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), DropoutBinary(keep=0.5), Affine(nout=101, init=Gaussian(scale=0.01), bias=Constant(-7), activation=Softmax()) ] return Model(layers=layers)
def aux_branch(bnode): return [ bnode, Pooling(fshape=5, strides=3, op="avg"), Conv((1, 1, 128), **common), Affine(nout=1024, init=init1, activation=relu, bias=bias), Dropout(keep=0.3), Affine(nout=1000, init=init1, activation=Softmax(), bias=Constant(0)) ]
def _createLayers(self, num_actions): # create network init_norm = Gaussian(loc=0.0, scale=0.01) layers = [] # The final hidden layer is fully-connected and consists of 512 rectifier units. layers.append(Affine(nout=64, init=init_norm, bias=init_norm, activation=Rectlin())) # The output layer is a fully-connected linear layer with a single output for each valid action. layers.append(Affine(nout=num_actions, init=init_norm, bias=init_norm)) return layers
def main(): parser = get_parser() args = parser.parse_args() print('Args:', args) loggingLevel = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(level=loggingLevel, format='') ext = extension_from_parameters(args) loader = p1b3.DataLoader(feature_subsample=args.feature_subsample, scaling=args.scaling, drug_features=args.drug_features, scramble=args.scramble, min_logconc=args.min_logconc, max_logconc=args.max_logconc, subsample=args.subsample, category_cutoffs=args.category_cutoffs) # initializer = Gaussian(loc=0.0, scale=0.01) initializer = GlorotUniform() activation = get_function(args.activation)() layers = [] reshape = None if args.convolution and args.convolution[0]: reshape = (1, loader.input_dim, 1) layer_list = list(range(0, len(args.convolution), 3)) for l, i in enumerate(layer_list): nb_filter = args.convolution[i] filter_len = args.convolution[i+1] stride = args.convolution[i+2] # print(nb_filter, filter_len, stride) # fshape: (height, width, num_filters). layers.append(Conv((1, filter_len, nb_filter), strides={'str_h':1, 'str_w':stride}, init=initializer, activation=activation)) if args.pool: layers.append(Pooling((1, args.pool))) for layer in args.dense: if layer: layers.append(Affine(nout=layer, init=initializer, activation=activation)) if args.drop: layers.append(Dropout(keep=(1-args.drop))) layers.append(Affine(nout=1, init=initializer, activation=neon.transforms.Identity())) model = Model(layers=layers) train_iter = ConcatDataIter(loader, ndata=args.train_samples, lshape=reshape, datatype=args.datatype) val_iter = ConcatDataIter(loader, partition='val', ndata=args.val_samples, lshape=reshape, datatype=args.datatype) cost = GeneralizedCost(get_function(args.loss)()) optimizer = get_function(args.optimizer)() callbacks = Callbacks(model, eval_set=val_iter, **args.callback_args) model.fit(train_iter, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
def aux_branch(bnode, ind): # TODO put dropout back in nm = 'loss%d/' % ind return [bnode, Pooling(fshape=5, strides=3, op="avg", name=nm+'ave_pool'), Conv((1, 1, 128), name=nm+'conv', **common), Affine(nout=1024, init=init1, activation=relu, bias=bias, name=nm+'fc'), Dropout(keep=1.0, name=nm+'drop_fc'), Affine(nout=1000, init=init1, activation=Softmax(), bias=Constant(0), name=nm+'classifier')]
def layers(self): return [ Conv((7, 7, 96), init=Gaussian(scale=0.0001), bias=Constant(0), activation=Rectlin(), padding=3, strides=1), LRN(31, ascale=0.001, bpower=0.75), Pooling(3, strides=2, padding=1), Conv((5, 5, 256), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=2, strides=1), LRN(31, ascale=0.001, bpower=0.75), Pooling(3, strides=2, padding=1), Conv((3, 3, 384), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=1, strides=1), Conv((3, 3, 384), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=1, strides=1), Conv((3, 3, 256), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=1, strides=1), Pooling(3, strides=2, padding=1), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(0), activation=Identity()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(0), activation=Identity()), Dropout(keep=0.5), Affine(nout=self.noutputs, init=Gaussian(scale=0.01), bias=Constant(0), activation=Softmax() if self.use_softmax else Logistic( shortcut=True)) ]
def fit_model(train_set, val_set, num_epochs=50): relu = Rectlin() conv_params = { 'strides': 1, 'padding': 1, 'init': Xavier(local=True), # Xavier sqrt(3)/num_inputs [CHECK THIS] 'bias': Constant(0), 'activation': relu } layers = [] layers.append(Conv((3, 3, 128), **conv_params)) # 3x3 kernel * 128 nodes layers.append(Pooling(2)) layers.append(Conv((3, 3, 128), **conv_params)) layers.append(Pooling(2)) # Highest value from 2x2 window. layers.append(Conv((3, 3, 128), **conv_params)) layers.append( Dropout(keep=0.5) ) # Flattens Convolution into a flat array, with probability 0.5 sets activation values to 0 layers.append( Affine(nout=128, init=GlorotUniform(), bias=Constant(0), activation=relu) ) # 1 value per conv kernel - Linear Combination of layers layers.append(Dropout(keep=0.5)) layers.append( Affine(nout=2, init=GlorotUniform(), bias=Constant(0), activation=Softmax(), name="class_layer")) # initialize model object cnn = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) optimizer = Adam() # callbacks = Callbacks(cnn) # out_fname = 'yarin_fdl_out_data.h5' callbacks = Callbacks(cnn, eval_set=val_set, eval_freq=1) # , output_file=out_fname cnn.fit(train_set, optimizer=optimizer, num_epochs=num_epochs, cost=cost, callbacks=callbacks) return cnn
def constuct_network(): """ Constructs the layers of the AlexNet architecture. """ layers = [ Conv((11, 11, 64), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=3, strides=4), Pooling(3, strides=2), Conv((5, 5, 192), init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin(), padding=2), Pooling(3, strides=2), Conv((3, 3, 384), init=Gaussian(scale=0.03), bias=Constant(0), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Pooling(3, strides=2), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), DropoutBinary(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), DropoutBinary(keep=0.5), Affine(nout=101, init=Gaussian(scale=0.01), bias=Constant(-7), activation=Softmax()) ] return Model(layers=layers)
def constructCNN(self): layers = [] if self.network_type == "idsia": layers.append( Conv((3, 3, 100), strides=1, init=Kaiming(), bias=Constant(0.0), activation=Rectlin(), name="Conv1")) layers.append(Pooling(2, op="max", strides=2, name="neon_pool1")) layers.append( Conv((4, 4, 150), strides=1, init=Kaiming(), bias=Constant(0.0), activation=Rectlin(), name="Conv2")) layers.append(Pooling(2, op="max", strides=2, name="neon_pool2")) layers.append( Conv((3, 3, 250), strides=1, init=Kaiming(), bias=Constant(0.0), activation=Rectlin(), name="Conv3")) layers.append(Pooling(2, op="max", strides=2, name="neon_pool3")) layers.append( Affine(nout=200, init=Kaiming(local=False), bias=Constant(0.0), activation=Rectlin(), name="neon_fc1")) layers.append( Affine(nout=self.class_num, init=Kaiming(local=False), bias=Constant(0.0), activation=Softmax(), name="neon_fc2")) elif self.network_type == "resnet-56": layers = resnet(9, self.class_num, int(self.resize_size[0] / 4)) # 6*9 + 2 = 56 elif self.network_type == "resnet-32": layers = resnet(5, self.class_num, int(self.resize_size[0] / 4)) # 6*5 + 2 = 32 elif self.network_type == "resnet-20": layers = resnet(3, self.class_num, int(self.resize_size[0] / 4)) # 6*3 + 2 = 20 return layers
def create_network(): layers = [ Conv((11, 11, 64), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=3, strides=4), Pooling(3, strides=2), Conv((5, 5, 192), init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin(), padding=2), Pooling(3, strides=2), Conv((3, 3, 384), init=Gaussian(scale=0.03), bias=Constant(0), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Pooling(3, strides=2), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=1000, init=Gaussian(scale=0.01), bias=Constant(-7), activation=Softmax()), ] return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyMulti())
def create_model(vocab_size, rlayer_type): """ Create LSTM/GRU model for bAbI dataset. Args: vocab_size (int) : String of bAbI data. rlayer_type (string) : Type of recurrent layer to use (gru or lstm). Returns: Model : Model of the created network """ # recurrent layer parameters (default gru) rlayer_obj = GRU if rlayer_type == 'gru' else LSTM rlayer_params = dict(output_size=100, reset_cells=True, init=GlorotUniform(), init_inner=Orthonormal(0.5), activation=Tanh(), gate_activation=Logistic()) # if using lstm, swap the activation functions if rlayer_type == 'lstm': rlayer_params.update(dict(activation=Logistic(), gate_activation=Tanh())) # lookup layer parameters lookup_params = dict(vocab_size=vocab_size, embedding_dim=50, init=Uniform(-0.05, 0.05)) # Model construction story_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)] query_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)] layers = [MergeMultistream(layers=[story_path, query_path], merge="stack"), Affine(vocab_size, init=GlorotUniform(), activation=Softmax())] return Model(layers=layers)
def test_concat_l1_l1(backend_default, allrand_args): # test two linear layers that are merged with concat dtypeu = np.float32 w_rng, rngmax = allrand_args # Diff size inputs and outputs nins = [128, 1024] nouts = [64, 2048] batch_size = 16 NervanaObject.be.bsz = batch_size be = NervanaObject.be init_unif = Uniform(low=w_rng[0], high=w_rng[1]) layers = [Sequential(Affine(nout=nout, init=init_unif)) for nout in nouts] inputs = [be.array(dtypeu(np.random.random((nin, batch_size)))) for nin in nins] merge = MergeMultistream(layers, merge="stack") assert(len(inputs) == len(layers)) merge.configure(inputs) merge.allocate() merge.set_deltas(None) out = merge.fprop(inputs).get() sublayers = [s.layers[0] for s in layers] weights = [layer.W.get() for layer in sublayers] out_exp = np.concatenate([np.dot(w, inp.get()) for (w, inp) in zip(weights, inputs)]) assert np.allclose(out, out_exp, atol=1e-3) err_lst = [dtypeu(np.random.random((nout, batch_size))) for nout in nouts] err_concat = np.concatenate(err_lst) merge.bprop(be.array(err_concat)) dW_exp_lst = [np.dot(err, inp.get().T) for (err, inp) in zip(err_lst, inputs)] for layer, dW_exp in zip(sublayers, dW_exp_lst): assert np.allclose(layer.dW.get(), dW_exp) return
def create_network(stage_depth): # Structure of the deep residual part of the network: # stage_depth modules of 2 convolutional layers each at feature map depths of 16, 32, 64 nfms = [2**(stage + 4) for stage in sorted(list(range(3)) * stage_depth)] strides = [ 1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1]) ] # Now construct the network layers = [Conv(**conv_params(3, 16))] layers.append(module_s1(nfms[0], True)) for nfm, stride in zip(nfms[1:], strides): res_module = module_s1(nfm) if stride == 1 else module_s2(nfm) layers.append(res_module) layers.append(BatchNorm()) layers.append(Activation(Rectlin())) layers.append(Pooling('all', op='avg')) layers.append( Affine(10, init=Kaiming(local=False), batch_norm=True, activation=Softmax())) return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyMulti())
def __init__(self, depth=9): self.depth = depth depth = 9 train = (3, 32, 32) nfms = [2**(stage + 4) for stage in sorted(list(range(3)) * depth)] strides = [ 1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1]) ] # Now construct the network layers = [Conv(**self.conv_params(3, 16))] layers.append(self.module_s1(nfms[0], True)) for nfm, stride in zip(nfms[1:], strides): res_module = self.module_s1( nfm) if stride == 1 else self.module_s2(nfm) layers.append(res_module) layers.append(BatchNorm()) layers.append(Activation(Rectlin())) layers.append(Pooling('all', op='avg')) layers.append( Affine(10, init=Kaiming(local=False), batch_norm=True, activation=Softmax())) self.layers = layers model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.initialize(train, cost=cost) self.model = model
def test_model_get_outputs_rnn(backend_default, data): data_path = load_ptb_test(path=data) data_set = Text(time_steps=50, path=data_path) # weight initialization init = Constant(0.08) # model initialization layers = [ Recurrent(150, init, activation=Logistic()), Affine(len(data_set.vocab), init, bias=init, activation=Rectlin()) ] model = Model(layers=layers) output = model.get_outputs(data_set) assert output.shape == (data_set.ndata, data_set.seq_length, data_set.nclass) # since the init are all constant and model is un-trained: # along the feature dim, the values should be all the same assert np.allclose(output[0, 0], output[0, 0, 0], rtol=0, atol=1e-5) assert np.allclose(output[0, 1], output[0, 1, 0], rtol=0, atol=1e-5) # along the time dim, the values should be increasing: assert np.alltrue(output[0, 2] > output[0, 1]) assert np.alltrue(output[0, 1] > output[0, 0])
def main_branch(branch_nodes): return [ Conv((7, 7, 64), padding=3, strides=2, name='conv1/7x7_s2', **common), Pooling(name="pool1/3x3_s2", **pool3s2p1), Conv((1, 1, 64), name='conv2/3x3_reduce', **common), Conv((3, 3, 192), name="conv2/3x3", **commonp1), Pooling(name="pool2/3x3_s2", **pool3s2p1), inception([(64, ), (96, 128), (16, 32), (32, )], name='inception_3a/'), inception([(128, ), (128, 192), (32, 96), (64, )], name='inception_3b/'), Pooling(name='pool3/3x3_s2', **pool3s2p1), inception([(192, ), (96, 208), (16, 48), (64, )], name='inception_4a/'), branch_nodes[0], inception([(160, ), (112, 224), (24, 64), (64, )], name='inception_4b/'), inception([(128, ), (128, 256), (24, 64), (64, )], name='inception_4c/'), inception([(112, ), (144, 288), (32, 64), (64, )], name='inception_4d/'), branch_nodes[1], inception([(256, ), (160, 320), (32, 128), (128, )], name='inception_4e/'), Pooling(name='pool4/3x3_s2', **pool3s2p1), inception([(256, ), (160, 320), (32, 128), (128, )], name='inception_5a/'), inception([(384, ), (192, 384), (48, 128), (128, )], name="inception_5b/"), Pooling(fshape=7, strides=1, op="avg", name='pool5/7x7_s1'), Affine(nout=1000, init=init1, activation=Softmax(), bias=Constant(0), name='loss3/classifier') ]