def __init__(self, nfm, first=False, strides=1, batch_norm=False): self.trunk = None self.side_path = None main_path = [ Convolution( **conv_params(1, nfm, strides=strides, batch_norm=batch_norm)), Convolution(**conv_params(3, nfm, batch_norm=batch_norm)), Convolution(**conv_params(1, nfm * 4, relu=False, batch_norm=False)) ] if first or strides == 2: self.side_path = Convolution(**conv_params( 1, nfm * 4, strides=strides, relu=False, batch_norm=False)) else: if batch_norm: main_path = [BatchNorm(), Activation(Rectlin())] + main_path else: main_path = [Activation(Rectlin())] + main_path if strides == 2: if batch_norm: self.trunk = Sequential([BatchNorm(), Activation(Rectlin())]) else: self.trunk = Sequential([Activation(Rectlin())]) self.main_path = Sequential(main_path)
def __init__(self, branch_units=[(192, 320), (192, 192, 192, 192)], name=None): """ Fourth inception block with three branches, concatenated in the end 1. 1x1 conv, 3x3 conv (stride=2, valid) 2. 1x1 conv, 1x7 conv, 7x1 conv, 3x3 conv (stride=2, valid) 3. 3x3 pool (stride=2, valid) Convolution(H, W, K) : height, width, number of filters Mixed_7a layer """ (p1, p2) = branch_units branch1 = Sequential([ Convolution(name=name + '_br1_conv1x1', **conv_params(filter_shape=(1, 1, p1[0]))), Convolution(name=name + '_br1_conv3x3', **conv_params(filter_shape=(3, 3, p1[1]), strides=2, padding=0)) ]) branch2 = Sequential([ Convolution(name=name + '_br2_conv1x1', **conv_params(filter_shape=(1, 1, p2[0]))), Convolution(name=name + '_br2_conv1x7', **conv_params(filter_shape=(1, 7, p2[1]), padding={ 'H': 0, 'W': 3, 'D': 0 })), Convolution(name=name + '_br2_conv7x1', **conv_params(filter_shape=(7, 1, p2[2]), padding={ 'H': 3, 'W': 0, 'D': 0 })), Convolution(name=name + '_br2_conv3x3', **conv_params(filter_shape=(3, 3, p2[3]), strides=2, padding=0)) ]) branch3 = Pooling(name=name + '_br3_maxpool', pool_shape=(3, 3), padding=0, strides=2, pool_type="max") branches = [branch1, branch2, branch3] super(Inceptionv3_b4, self).__init__(name=name, branches=branches, mode='concat')
def residual_block(in_channels, out_channels, kernel_size, dilation, dropout=0.2, stride=1): # define two temporal blocks tb = [] for i in range(2): tb += temporal_block(out_channels, kernel_size, stride, dilation, dropout=dropout) main_path = Sequential(tb) # sidepath if in_channels != out_channels: side_path = Sequential([Convolution(filter_shape=(1, out_channels), filter_init=GaussianInit(0, 0.01), strides=1, dilation=1, padding='same', batch_norm=False)]) else: side_path = None # combine both return ResidualModule(main_path, side_path)
def get_mp_sp(self, num_fils, net_type, direct=True, bottleneck=False, strides=1): if (net_type == "cifar10"): # Mainpath for CIFAR10 is fixed main_path = Sequential([ Convolution(**conv_params(3, num_fils, strides=strides)), Convolution(**conv_params(3, num_fils, activation=None)) ]) # Side Path if (direct): side_path = None else: side_path = Convolution(**conv_params( 1, num_fils, strides=strides, activation=None)) elif (net_type == "i1k"): # Mainpath for i1k is depends if bottleneck is enabled or not if (bottleneck): main_path = Sequential([ Convolution(**conv_params(1, num_fils, strides=strides)), Convolution(**conv_params(3, num_fils)), Convolution(**conv_params(1, num_fils * 4, activation=None)) ]) else: main_path = Sequential([ Convolution(**conv_params(3, num_fils, strides=strides)), Convolution(**conv_params(3, num_fils, activation=None)) ]) # Side Path if (direct): side_path = None else: if (bottleneck): side_path = Convolution(**conv_params( 1, num_fils * 4, strides=strides, activation=None)) else: side_path = Convolution(**conv_params( 1, num_fils, strides=strides, activation=None)) else: raise NameError( "Incorrect dataset. Should be --dataset cifar10 or --dataset i1k" ) return main_path, side_path
def __init__(self, branch_units=[(64, ), (48, 64), (64, 96, 96), (64, )], name=None): """ First inception block with four branches, concatenated in the end 1. 1x1 conv 2. 1x1 conv, 5x5 conv 3. 1x1 conv, 3x3conv, 3x3 conv 4. 3x3 pool, 1x1 conv Convolution(H, W, K) : height, width, number of filters Mixed_5b, Mixed_5c, Mixed_5d layers """ (p1, p2, p3, p4) = branch_units branch1 = Convolution(name=name + '_br1_1x1conv', **conv_params(filter_shape=(1, 1, p1[0]))) branch2 = Sequential([ Convolution(name=name + '_br2_1x1conv', **conv_params(filter_shape=(1, 1, p2[0]))), Convolution(name=name + '_br2_5x5conv', **conv_params(filter_shape=(5, 5, p2[1]), padding=2)) ]) branch3 = Sequential([ Convolution(name=name + '_br3_1x1conv', **conv_params(filter_shape=(1, 1, p3[0]))), Convolution(name=name + '_br3_3x3conv1', **conv_params(filter_shape=(3, 3, p3[1]), padding=1)), Convolution(name=name + '_br3_3x3conv2', **conv_params(filter_shape=(3, 3, p3[2]), padding=1)) ]) branch4 = Sequential([ Pooling(name=name + '_br4_avgpool', pool_shape=(3, 3), padding=1, strides=1, pool_type="avg"), Convolution(name=name + '_br4_conv1x1', **conv_params(filter_shape=(1, 1, p4[0]))) ]) branches = [branch1, branch2, branch3, branch4] super(Inceptionv3_b1, self).__init__(name=name, branches=branches, mode='concat')
def create_network(): ''' Define 3D convolutional network ''' # Define for weight initialization g1 = GaussianInit(mean=0., var=0.01) g5 = GaussianInit(mean=0., var=0.005) c0 = ConstantInit(val=0.) c1 = ConstantInit(val=1.) ax.Y.length = 101 padding = {'D': 1, 'H': 1, 'W': 1, 'C': 0} strides = {'D': 2, 'H': 2, 'W': 2, 'C': 1} layers = [ Convolution((3, 3, 3, 64), padding=padding, filter_init=g1, bias_init=c0, activation=Rectlin()), Pooling((1, 2, 2), strides={ 'D': 1, 'H': 2, 'W': 2, 'C': 1 }), Convolution((3, 3, 3, 128), padding=padding, filter_init=g1, bias_init=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Convolution((3, 3, 3, 256), padding=padding, filter_init=g1, bias_init=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Convolution((3, 3, 3, 256), padding=padding, filter_init=g1, bias_init=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Convolution((3, 3, 3, 256), padding=padding, filter_init=g1, bias_init=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Affine(nout=2048, weight_init=g5, bias_init=c1, activation=Rectlin()), Dropout(keep=0.5), Affine(nout=2048, weight_init=g5, bias_init=c1, activation=Rectlin()), Dropout(keep=0.5), Affine(axes=ax.Y, weight_init=g1, bias_init=c0, activation=Softmax()) ] return Sequential(layers)
def make_discriminator(): discriminator = [Affine(nout=dim, weight_init=w_init, bias_init=b_init, activation=Rectlin()), Affine(nout=dim, weight_init=w_init, bias_init=b_init, activation=Rectlin()), Affine(nout=dim, weight_init=w_init, bias_init=b_init, activation=Rectlin()), Affine(nout=1, weight_init=w_init, bias_init=b_init, activation=None)] return Sequential(discriminator, name="Discriminator")
def make_generator(out_axis): generator = [Affine(nout=dim, weight_init=w_init, bias_init=b_init, activation=Rectlin()), Affine(nout=dim, weight_init=w_init, bias_init=b_init, activation=Rectlin()), Affine(nout=dim, weight_init=w_init, bias_init=b_init, activation=Rectlin()), Affine(axes=out_axis, weight_init=w_init, bias_init=b_init, activation=None)] return Sequential(generator, name="Generator")
def make_discriminator_gp(bn=True, n_extra_layers=0, disc_activation=None, bias_init=None): conv_layers = [ Convolution((4, 4, 64), filter_init, strides=2, padding=1, activation=lrelu, batch_norm=False, bias_init=bias_init) ] for i in range(n_extra_layers): conv_layers.append( Convolution((3, 3, 64), filter_init, strides=1, padding=1, activation=lrelu, batch_norm=bn, bias_init=bias_init)) conv_layers.append( Convolution((4, 4, 128), filter_init, strides=2, padding=1, activation=lrelu, batch_norm=bn, bias_init=bias_init)) conv_layers.append( Convolution((4, 4, 256), filter_init, strides=2, padding=1, activation=lrelu, batch_norm=bn, bias_init=bias_init)) conv_layers.append( Convolution((4, 4, 512), filter_init, strides=2, padding=1, activation=lrelu, batch_norm=bn, bias_init=bias_init)) conv_layers.append( Convolution((4, 4, 1), filter_init, strides=1, padding=0, activation=disc_activation, batch_norm=False, bias_init=bias_init)) return Sequential(conv_layers, name="Discriminator")
def make_generator(bn=True): # TODO # add affine before conv once that is corrected # https://github.com/NervanaSystems/private-ngraph/issues/2054 deconv_layers = [ Deconvolution((1, 1, 16), filter_init, strides=1, padding=0, activation=relu, batch_norm=bn), Deconvolution((3, 3, 192), filter_init, strides=1, padding=0, activation=relu, batch_norm=bn, deconv_out_shape=(1, 5, 5)), Deconvolution((3, 3, 192), filter_init, strides=2, padding=0, activation=relu, batch_norm=bn, deconv_out_shape=(1, 11, 11)), Deconvolution((3, 3, 192), filter_init, strides=1, padding=0, activation=relu, batch_norm=bn, deconv_out_shape=(1, 13, 13)), Deconvolution((3, 3, 96), filter_init, strides=2, padding=0, activation=relu, batch_norm=bn, deconv_out_shape=(1, 27, 27)), Deconvolution((3, 3, 96), filter_init, strides=1, padding=0, activation=relu, batch_norm=bn, deconv_out_shape=(1, 28, 28)), Deconvolution((3, 3, 1), filter_init, strides=1, padding=1, activation=Tanh(), batch_norm=False, deconv_out_shape=(1, 28, 28)) ] return Sequential(deconv_layers, name="Generator")
def define_model(out_axes=None, celltype='RNN', recurrent_units=[32], init=GlorotInit(), return_sequence=True): layers = define_recurrent_layers(out_axes=out_axes, celltype=celltype, recurrent_units=recurrent_units, init=init, return_sequence=return_sequence) return Sequential(layers)
def test_dilated_conv(dilation): """Test that the dilated convolution layer output matches expected. This test compares the maximum output value to an expected max output value. The expected value is computed based on the dilation parameter. The test also checks that the output size matches the expected size based on the dilaton parameter value.""" image_size = 3 batch_size = 1 init_val = 0.1 conv_size = 3 pad = 3 N_filters = 1 image_channels = 3 model = Sequential([ Convolution((conv_size, conv_size, N_filters), filter_init=ConstantInit(val=init_val), padding=pad, dilation=dilation) ]) X = np.ones(shape=(batch_size, 3, image_size, image_size)) # Create dummy image data = {'image': X, 'iteration': 1} data_size = OrderedDict([('N', batch_size), ('C', 3), ('H', image_size), ('W', image_size)]) ax = [ ng.make_axis(length=data_size[k], name=k) for k in list(data_size.keys()) ] p_axes = ng.make_axes(ax) named_inputs = {'image': ng.placeholder(p_axes)} outputs = model(named_inputs['image']) named_outputs = {outputs.name: outputs} with closing(ngt.make_transformer()) as transformer: m = make_bound_computation(transformer, named_outputs, named_inputs) output = m(data)[list(m(data).keys())[0]] filter_size = dilation * (conv_size - 1) + 1 # Compute expected filter size # Compute the expected output size based on convolution parameters out_size = (image_size + 2 * pad - filter_size) + 1 filt_tmp = np.zeros(filter_size) filt_tmp[0::dilation] = 1 # max overlap between dilated filter and image (in 1-d) max_overlap = int(np.min([filter_size, image_size])) exp_max_output = init_val * image_channels * (np.sum( filt_tmp[0:max_overlap]))**2 # Expected max output changes for different dilation parameter values# assert int(10 * np.max(output)) == int(10 * exp_max_output), \ ("Dilated conv max outputs do not match expected: " "{} != {}").format(np.max(output), init_val * conv_size * ((image_size - (dilation - 1))**2)) assert np.shape(output) == (batch_size, N_filters, out_size, out_size), \ ("Dilated conv output is not expected size: " "{} != {}").format(np.shape(output), (batch_size, N_filters, out_size, out_size))
def define_model(out_axis, filter_shapes=[5], n_filters=[32], init=KaimingInit()): assert len(filter_shapes) == len(n_filters) layers = [] for e, (f, n) in enumerate(zip(filter_shapes, n_filters)): layers.append(Convolution(filter_shape=(f, n), filter_init=init, strides=1, padding="valid", dilation=1, activation=Rectlin(), batch_norm=True)) affine_layer = Affine(weight_init=init, bias_init=init, activation=Identity(), axes=out_axis) model = Sequential(layers + [affine_layer]) return model
def make_generator_gp(bn=True, n_extra_layers=0, bias_init=None): deconv_layers = [ Deconvolution((4, 4, 512), filter_init, strides=1, padding=0, activation=relu, batch_norm=bn, bias_init=bias_init), Deconvolution((4, 4, 256), filter_init, strides=2, padding=1, activation=relu, batch_norm=bn, bias_init=bias_init), Deconvolution((4, 4, 128), filter_init, strides=2, padding=1, activation=relu, batch_norm=bn, bias_init=bias_init), Deconvolution((4, 4, 64), filter_init, strides=2, padding=1, activation=relu, batch_norm=bn, bias_init=bias_init) ] for i in range(n_extra_layers): deconv_layers.append( Convolution((3, 3, 64), filter_init, strides=1, padding=1, activation=lrelu, batch_norm=bn, bias_init=bias_init)) deconv_layers.append( Deconvolution((4, 4, 3), filter_init, strides=2, padding=1, activation=Tanh(), batch_norm=False, bias_init=bias_init)) return Sequential(deconv_layers, name="Generator")
def tcn(n_features_in, hidden_sizes, kernel_size=7, dropout=0.2): # loop and define multiple residual blocks n_hidden_layers = len(hidden_sizes) layers = [] for i in range(n_hidden_layers): dilation_size = 2 ** i in_channels = n_features_in if i==0 else hidden_sizes[i-1] out_channels = hidden_sizes[i] layers += [residual_block(in_channels, out_channels, kernel_size, dilation=dilation_size, dropout=dropout), Rectlin()] # define model model = Sequential(layers) return model
def make_discriminator(bn=True, disc_activation=None): conv_layers = [ Convolution((3, 3, 96), filter_init, strides=1, padding=1, activation=lrelu, batch_norm=bn), Convolution((3, 3, 96), filter_init, strides=2, padding=1, activation=lrelu, batch_norm=bn), Convolution((3, 3, 192), filter_init, strides=1, padding=1, activation=lrelu, batch_norm=bn), Convolution((3, 3, 192), filter_init, strides=2, padding=1, activation=lrelu, batch_norm=bn), Convolution((3, 3, 192), filter_init, strides=1, padding=1, activation=lrelu, batch_norm=bn), Convolution((1, 1, 16), filter_init, strides=1, padding=0, activation=lrelu, batch_norm=bn), Convolution((7, 7, 1), filter_init, strides=1, padding=0, activation=disc_activation, batch_norm=False) ] return Sequential(conv_layers, name="Discriminator")
def make_discriminator(bn=True, disc_activation=None, bias_init=None): conv_layers = [ Convolution((4, 4, 128), filter_init, strides=2, padding=1, activation=lrelu, batch_norm=False, bias_init=bias_init) ] conv_layers.append( Convolution((4, 4, 256), filter_init, strides=2, padding=1, activation=lrelu, batch_norm=bn, bias_init=bias_init)) conv_layers.append( Convolution((4, 4, 512), filter_init, strides=2, padding=1, activation=lrelu, batch_norm=bn, bias_init=bias_init)) conv_layers.append( Convolution((4, 4, 1024), filter_init, strides=2, padding=1, activation=lrelu, batch_norm=bn, bias_init=bias_init)) conv_layers.append( Affine(weight_init=filter_init, activation=None, batch_norm=False, axes=ng.make_axes({ "C": 1, "H": 1, "W": 1 }))) return Sequential(conv_layers, name="Discriminator")
def __init__(self, number_embeddings_features, tokens_in_embeddings, deep_parameters, deep_activation_fn, drop_out_rate=0.0): super(WideDeepClassifier, self).__init__(name="WideAndDeep") # Embeddings # Make the axes self.luts = [] for e in range(len(number_embeddings_features)): init_uniform = UniformInit(0, 1) # pad_idx have to be initialize to 0 explicitly. lut = LookupTable(tokens_in_embeddings[e], number_embeddings_features[e], init_uniform, pad_idx=0, update=True) self.luts.append(lut) # Model specification init_xavier = XavierInit() layers = [] for i in range(len(deep_parameters)): layers.append( Affine(nout=deep_parameters[i], weight_init=init_xavier, activation=deep_activation_fn)) if drop_out_rate > 0.0: layers.append(Dropout(keep=drop_out_rate)) layers.append(Affine(axes=tuple(), weight_init=init_xavier)) self.deep_layers = Sequential(layers) self.linear_layer = Affine(axes=tuple(), weight_init=init_xavier)
def summary(self): if self.layers is None: raise ValueError("Model layers not provided") total_num_vars = 0 total_num_not_trainable = 0 print("".join(100 * ["-"])) print("{: >20} {: >20} {: >20} {: >20} {: >20}".format( "index", "name", "# trainable vars", "# not trainable vars", "output_shape")) print("".join(100 * ["-"])) for e, layer in enumerate(self.layers): temp_model = Sequential(self.layers[0:e + 1]) l_output = temp_model(self.input_placeholders['X']) num_vars, num_not_trainable = self._get_number_of_vars_in_layer( layer) if num_vars is not None: total_num_vars += num_vars if num_not_trainable is not None: total_num_not_trainable += num_not_trainable if 'name' in layer.__dict__: l_name = layer.name elif isinstance(layer, ResidualModule): l_name = 'ResidualModule' else: l_name = type(layer).__name__ if 'axes' in dir(l_output): print("{: >20} {: >20} {: >20} {: >20} {: >20}".format( str(e), l_name, str(num_vars), str(num_not_trainable), str(l_output.axes))) else: print("{: >20} {: >20} {: >20} {: >20} {: >20}".format( str(e), l_name, str(num_vars), str(num_not_trainable), "Unknown")) print("".join(100 * ["-"])) print("Total number of trainable parameters: %d" % total_num_vars) print("Total number of non trainable parameters: %d" % total_num_not_trainable) print("".join(100 * ["-"])) print("Optimizer type {}".format(self.opt.name)) print("Optimizer learning rate {}".format( self.opt.lrate.initial_value.item())) print("".join(100 * ["-"]))
def make_generator(bn=True, bias_init=None): deconv_layers = [ Affine(weight_init=filter_init, activation=None, batch_norm=False, axes=ng.make_axes({ "C": 1024, "H": 4, "W": 4 })), Deconvolution((4, 4, 512), filter_init, strides=2, padding=1, activation=relu, batch_norm=bn, bias_init=bias_init), Deconvolution((4, 4, 256), filter_init, strides=2, padding=1, activation=relu, batch_norm=bn, bias_init=bias_init), Deconvolution((4, 4, 128), filter_init, strides=2, padding=1, activation=relu, batch_norm=bn, bias_init=bias_init) ] deconv_layers.append( Deconvolution((4, 4, 3), filter_init, strides=2, padding=1, activation=Tanh(), batch_norm=False, bias_init=bias_init)) return Sequential(deconv_layers, name="Generator")
def __init__(self, branch_units=[(384, ), (64, 96, 96)], name=None): """ Second inception block with three branches, concatenated in the end 1. 3x3 conv (stride = 2, valid) 2. 1x1 conv, 3x3 conv, 3x3 conv (stride=2, valid) 3. 3x3 pool (stride = 2, valid) Convolution(H, W, K) : height, width, number of filters Mixed_6a layer """ (p1, p2) = branch_units branch1 = Convolution(name=name + '_br1_3x3conv', **conv_params(filter_shape=(3, 3, p1[0]), strides=2, padding=0)) branch2 = Sequential([ Convolution(name=name + '_br2_1x1conv', **conv_params(filter_shape=(1, 1, p2[0]))), Convolution(name=name + '_br2_3x3conv1', **conv_params(filter_shape=(3, 3, p2[1]), padding=1)), Convolution(name=name + '_br2_3x3conv2', **conv_params(filter_shape=(3, 3, p2[2]), strides=2, padding=0)) ]) branch3 = Pooling(pool_shape=(3, 3), padding=0, strides=2, pool_type="max", name=name + '_br3_maxpool') branches = [branch1, branch2, branch3] super(Inceptionv3_b2, self).__init__(name=name, branches=branches, mode='concat')
def cifar_mean_subtract(x): bgr_mean = ng.persistent_tensor(axes=x.axes[0], initial_value=np.array([[104., 119., 127.]])) y = ng.expand_dims((x - bgr_mean) / 255., ax.D, 1) return y init_uni = UniformInit(-0.1, 0.1) seq1 = Sequential([ Preprocess(functor=cifar_mean_subtract), Convolution((5, 5, 16), filter_init=init_uni, activation=Rectlin()), Pool2D(2, strides=2), Convolution((5, 5, 32), filter_init=init_uni, activation=Rectlin()), Pool2D(2, strides=2), Affine(nout=500, weight_init=init_uni, activation=Rectlin()), Affine(axes=ax.Y, weight_init=init_uni, activation=Softmax()) ]) ###################### # Input specification ax.C.length, ax.H.length, ax.W.length = train_set.shapes['image'] ax.D.length = 1 ax.N.length = args.batch_size ax.Y.length = 10 # placeholders with descriptive names inputs = dict(image=ng.placeholder([ax.C, ax.H, ax.W, ax.N]), label=ng.placeholder([ax.N]))
layer_0 = LookupTable(50, 100, init, update=True, pad_idx=0) else: layer_0 = Preprocess(functor=lambda x: ng.one_hot(x, axis=ax.Y)) if args.layer_type == "rnn": rlayer = Recurrent(hidden_size, init, activation=Tanh()) elif args.layer_type == "birnn": rlayer = BiRNN(hidden_size, init, activation=Tanh(), return_sequence=True, sum_out=True) # model initialization seq1 = Sequential([ layer_0, rlayer, Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, )) ]) optimizer = RMSProp() train_prob = seq1(inputs['inp_txt']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['tgt_txt'], axis=ax.Y), usebits=True) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['inp_txt'])
seq1 = Sequential([ Convolution((3, 3, 64), filter_init=GaussianInit(var=0.01), bias_init=init, activation=Rectlin(), padding=1), Pool2D(2, strides=2), Convolution((3, 3, 128), filter_init=GaussianInit(var=0.01), bias_init=init, activation=Rectlin(), padding=1), Pool2D(2, strides=2), Convolution((3, 3, 256), filter_init=GaussianInit(var=0.01), bias_init=init, activation=Rectlin(), padding=1), Convolution((3, 3, 256), filter_init=GaussianInit(var=0.01), bias_init=init, activation=Rectlin(), padding=1), Pool2D(2, strides=2), Convolution((3, 3, 512), filter_init=GaussianInit(var=0.01), bias_init=init, activation=Rectlin(), padding=1), Convolution((3, 3, 512), filter_init=GaussianInit(var=0.01), bias_init=init, activation=Rectlin(), padding=1), Pool2D(2, strides=2), Convolution((3, 3, 512), filter_init=GaussianInit(var=0.01), bias_init=init, activation=Rectlin(), padding=1), Convolution((3, 3, 512), filter_init=GaussianInit(var=0.01), bias_init=init, activation=Rectlin(), padding=1), Pool2D(2, strides=2), Affine(nout=4096, weight_init=GaussianInit(var=0.01), bias_init=init, activation=Rectlin()), Affine(nout=4096, weight_init=GaussianInit(var=0.01), bias_init=init, activation=Rectlin()), Affine(axes=ax.Y, weight_init=GaussianInit(var=0.01), bias_init=init, activation=Softmax()) ])
initial_value=np.array([104., 119., 127.])) return (x - bgr_mean) / 255. init_uni = UniformInit(-0.1, 0.1) seq1 = Sequential([ Preprocess(functor=cifar_mean_subtract), Convolution((5, 5, 16), filter_init=init_uni, activation=Rectlin(), batch_norm=args.use_batch_norm), Pool2D(2, strides=2), Convolution((5, 5, 32), filter_init=init_uni, activation=Rectlin(), batch_norm=args.use_batch_norm), Pool2D(2, strides=2), Affine(nout=500, weight_init=init_uni, activation=Rectlin(), batch_norm=args.use_batch_norm), Affine(axes=ax.Y, weight_init=init_uni, activation=Softmax()) ]) optimizer = GradientDescentMomentum(0.01, 0.9) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss),
def __init__(self, mini=False): """ Builds Inception model based on: https://github.com/tensorflow/models/blob/master/research/slim/nets/inception_v3.py """ # Input size is 299 x 299 x 3 if mini: """ This is the mini model with reduced number of filters in each layer """ # Root branch of the tree seq1 = Sequential([ Convolution(name='conv_1a_3x3', **conv_params(filter_shape=(3, 3, 32), padding=0, strides=2)), # conv2d_1a_3x3 Convolution(name='conv_2a_3x3', **conv_params(filter_shape=(3, 3, 16), padding=0)), # conv2d_2a_3x3 Convolution(name='conv_2b_3x3', **conv_params(filter_shape=(3, 3, 16), padding=1)), # conv2d_2b_3x3 Pooling(name='pool_1_3x3', pool_shape=(3, 3), padding=0, strides=2, pool_type='max'), # maxpool_3a_3x3 Convolution(name='conv_3b_1x1', **conv_params(filter_shape=(1, 1, 16))), # conv2d_3b_1x1 Convolution(name='conv_4a_3x3', **conv_params(filter_shape=(3, 3, 32), padding=1)), # conv2d_4a_3x3 Pooling(name='pool_2_3x3', pool_shape=(3, 3), padding=0, strides=2, pool_type='max'), # maxpool_5a_3x3 Inceptionv3_b1([(32, ), (32, 32), (32, 32, 32), (32, )], name='mixed_5b'), Inceptionv3_b1([(32, ), (32, 32), (32, 32, 32), (32, )], name='mixed_5c'), Inceptionv3_b1([(32, ), (32, 32), (32, 32, 32), (32, )], name=' mixed_5d'), Inceptionv3_b2([(32, ), (32, 32, 32)], name=' mixed_6a'), Inceptionv3_b3([(32, ), (32, 32, 32), (32, 32, 32, 32, 32), (32, )], name='mixed_6b'), Inceptionv3_b3([(32, ), (32, 32, 32), (32, 32, 32, 32, 32), (32, )], name='mixed_6c'), Inceptionv3_b3([(32, ), (32, 32, 32), (32, 32, 32, 32, 32), (32, )], name='mixed_6d'), Inceptionv3_b3([(32, ), (32, 32, 32), (32, 32, 32, 32, 32), (32, )], name='mixed_6e') ]) # Branch of main classifier seq2 = Sequential([ Inceptionv3_b4([(32, 32), (32, 32, 32, 32)], name='mixed_7a'), Inceptionv3_b5([(32, ), (32, 32, 32), (32, 32, 32, 32), (32, )], name='mixed_7b'), Inceptionv3_b5([(32, ), (32, 32, 32), (32, 32, 32, 32), (32, )], name='mixed_7c'), Pooling(pool_shape=(8, 8), padding=0, strides=2, pool_type='avg'), # Last Avg Pool Dropout(keep=0.8), Convolution(name='main_final_conv1x1', **conv_params(filter_shape=(1, 1, 1000), activation=Softmax(), batch_norm=False)) ]) # Auxiliary classifier seq_aux = Sequential([ Pooling(pool_shape=(5, 5), padding=0, strides=3, pool_type='avg'), Convolution(name='aux_conv1x1_v1', **conv_params(filter_shape=(1, 1, 32))), Convolution(name='aux_conv5x5', **conv_params(filter_shape=(5, 5, 32))), Convolution(name='aux_conv1x1_v2', **conv_params(filter_shape=(1, 1, 1000), activation=Softmax(), batch_norm=False)) ]) else: # Root branch of the tree seq1 = Sequential([ Convolution(name='conv_1a_3x3', **conv_params(filter_shape=(3, 3, 32), padding=0, strides=2)), # conv2d_1a_3x3 Convolution(name='conv_2a_3x3', **conv_params(filter_shape=(3, 3, 32), padding=0)), # conv2d_2a_3x3 Convolution(name='conv_2b_3x3', **conv_params(filter_shape=(3, 3, 64), padding=1)), # conv2d_2b_3x3 Pooling(name='pool_1_3x3', pool_shape=(3, 3), padding=0, strides=2, pool_type='max'), # maxpool_3a_3x3 Convolution(name='conv_3b_1x1', **conv_params(filter_shape=(1, 1, 80))), # conv2d_3b_1x1 Convolution(name='conv_4a_3x3', **conv_params(filter_shape=(3, 3, 192), padding=1)), # conv2d_4a_3x3 Pooling(name='pool_2_3x3', pool_shape=(3, 3), padding=0, strides=2, pool_type='max'), # maxpool_5a_3x3 Inceptionv3_b1([(64, ), (48, 64), (64, 96, 96), (32, )], name='mixed_5b'), Inceptionv3_b1([(64, ), (48, 64), (64, 96, 96), (64, )], name='mixed_5c'), Inceptionv3_b1([(64, ), (48, 64), (64, 96, 96), (64, )], name=' mixed_5d'), Inceptionv3_b2([(384, ), (64, 96, 96)], name=' mixed_6a'), Inceptionv3_b3([(192, ), (128, 128, 192), (128, 128, 128, 128, 192), (192, )], name='mixed_6b'), Inceptionv3_b3([(192, ), (160, 160, 192), (160, 160, 160, 160, 192), (192, )], name='mixed_6c'), Inceptionv3_b3([(192, ), (160, 160, 192), (160, 160, 160, 160, 192), (192, )], name='mixed_6d'), Inceptionv3_b3([(192, ), (192, 192, 192), (192, 192, 192, 192, 192), (192, )], name='mixed_6e') ]) # Branch of main classifier seq2 = [ Inceptionv3_b4([(192, 320), (192, 192, 192, 192)], name='mixed_7a'), Inceptionv3_b5([(320, ), (384, 384, 384), (448, 384, 384, 384), (192, )], name='mixed_7b'), Inceptionv3_b5([(320, ), (384, 384, 384), (448, 384, 384, 384), (192, )], name='mixed_7c'), Pooling(pool_shape=(8, 8), padding=0, strides=2, pool_type='avg'), # Last Avg Pool Dropout(keep=0.8), Convolution(name='main_final_conv1x1', **conv_params(filter_shape=(1, 1, 1000), activation=Softmax(), batch_norm=False)) ] seq2 = Sequential(seq2) # Auxiliary classifier my_seq = [ Pooling(pool_shape=(5, 5), padding=0, strides=3, pool_type='avg'), Convolution(name='aux_conv1x1_v1', **conv_params(filter_shape=(1, 1, 128))), Convolution(name='aux_conv5x5', **conv_params(filter_shape=(5, 5, 768))), Convolution(name='aux_conv1x1_v2', **conv_params(filter_shape=(1, 1, 1000), activation=Softmax(), batch_norm=False)) ] seq_aux = Sequential(my_seq) self.seq1 = seq1 self.seq2 = seq2 self.seq_aux = seq_aux
init, activation=Tanh(), reset_cells=True, return_sequence=False) else: rlayer = BiRNN(hidden_size, init, activation=Tanh(), reset_cells=True, return_sequence=False, sum_out=True) # model initialization seq1 = Sequential([ LookupTable(vocab_size, embed_size, init, update=True, pad_idx=pad_idx), rlayer, Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, )) ]) optimizer = RMSProp(decay_rate=0.95, learning_rate=2e-3, epsilon=1e-6, gradient_clip_value=gradient_clip_value) train_prob = seq1(inputs['review']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y), usebits=True) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())])
if args.layer_type == "lstm": rlayer1 = LSTM(hidden_size, init, activation=Tanh(), gate_activation=Logistic(), return_sequence=True) rlayer2 = LSTM(hidden_size, init, activation=Tanh(), gate_activation=Logistic(), return_sequence=True) # model initialization seq1 = Sequential([ Preprocess(functor=expand_onehot), rlayer1, rlayer2, Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, )) ]) optimizer = RMSProp(gradient_clip_value=gradient_clip_value) train_prob = seq1(inputs['inp_txt']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['tgt_txt'], axis=ax.Y), usebits=True) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['inp_txt'])
inputs = train_set.make_placeholders() ax.Y.length = 10 ###################### # Model specification def cifar_mean_subtract(x): bgr_mean = ng.persistent_tensor( axes=x.axes.find_by_name('C'), initial_value=np.array([104., 119., 127.])) return (x - bgr_mean) / 255. seq1 = Sequential([Preprocess(functor=cifar_mean_subtract), Affine(nout=200, weight_init=UniformInit(-0.1, 0.1), activation=Rectlin()), Affine(axes=ax.Y, weight_init=UniformInit(-0.1, 0.1), activation=Softmax())]) optimizer = GradientDescentMomentum(0.1, 0.9) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['image']) errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]), inputs['label']) eval_loss = ng.cross_entropy_multi(inference_prob, ng.one_hot(inputs['label'], axis=ax.Y)) eval_outputs = dict(cross_ent_loss=eval_loss, misclass_pct=errors) # Now bind the computations we are interested in
Deconvolution((3, 3, 96), filter_init, strides=1, padding=0, activation=relu, batch_norm=True, deconv_out_shape=(1, 28, 28)), Deconvolution((3, 3, 1), filter_init, strides=1, padding=1, activation=Tanh(), batch_norm=False, deconv_out_shape=(1, 28, 28)) ] generator = Sequential(deconv_layers, name="Generator") # discriminator network lrelu = Rectlin(slope=0.1) conv_layers = [ Convolution((3, 3, 96), filter_init, strides=1, padding=1, activation=lrelu, batch_norm=True), Convolution((3, 3, 96), filter_init, strides=2, padding=1,