def __init__(self, depth=9): self.depth = depth depth = 9 train = (3, 32, 32) nfms = [2**(stage + 4) for stage in sorted(list(range(3)) * depth)] strides = [ 1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1]) ] # Now construct the network layers = [Conv(**self.conv_params(3, 16))] layers.append(self.module_s1(nfms[0], True)) for nfm, stride in zip(nfms[1:], strides): res_module = self.module_s1( nfm) if stride == 1 else self.module_s2(nfm) layers.append(res_module) layers.append(BatchNorm()) layers.append(Activation(Rectlin())) layers.append(Pooling('all', op='avg')) layers.append( Affine(10, init=Kaiming(local=False), batch_norm=True, activation=Softmax())) self.layers = layers model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.initialize(train, cost=cost) self.model = model
def deconv_layer(name, n_feature, ker_size=4, strides=2, padding=1, activation=lrelu, batch_norm=True, bias=None): """ Layer configuration for deep-convolutional (DC) discriminator Arguments: name (string): Layer name' n_feature (int): Number of output feature maps ker_size (int): Size of convolutional kernel (defaults to 4) strides (int): Stride of convolution (defaults to 2) padding (int): Padding of convolution (defaults to 1) activation (object): Activation function (defaults to leaky ReLu) batch_norm(bool): Enable batch normalization (defaults to True) """ layers = [] layers.append( Deconvolution(fshape=(ker_size, ker_size, n_feature), strides=strides, padding=padding, dilation={}, init=init_w, bsum=batch_norm, name=name)) if batch_norm: layers.append(BatchNorm(name=name + '_bnorm', **bn_prm)) if bias is not None: layers.append(Bias(init=None, name=name + '_bias')) layers.append(Activation(transform=activation, name=name + '_rectlin')) return layers
def create_network(stage_depth): # Structure of the deep residual part of the network: # stage_depth modules of 2 convolutional layers each at feature map depths of 16, 32, 64 nfms = [2**(stage + 4) for stage in sorted(list(range(3)) * stage_depth)] strides = [ 1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1]) ] # Now construct the network layers = [Conv(**conv_params(3, 16))] layers.append(module_s1(nfms[0], True)) for nfm, stride in zip(nfms[1:], strides): res_module = module_s1(nfm) if stride == 1 else module_s2(nfm) layers.append(res_module) layers.append(BatchNorm()) layers.append(Activation(Rectlin())) layers.append(Pooling('all', op='avg')) layers.append( Affine(10, init=Kaiming(local=False), batch_norm=True, activation=Softmax())) return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyMulti())
def module_s1(nfm, first=False): ''' non-strided ''' sidepath = Conv( **conv_params(1, nfm * 4, 1, False, False)) if first else SkipNode() mainpath = [] if first else [BatchNorm(), Activation(Rectlin())] mainpath.append(Conv(**conv_params(1, nfm))) mainpath.append(Conv(**conv_params(3, nfm))) mainpath.append( Conv(**conv_params(1, nfm * 4, relu=False, batch_norm=False))) return MergeSum([sidepath, mainpath])
def module_s2(nfm): ''' strided ''' module = [BatchNorm(), Activation(Rectlin())] mainpath = [ Conv(**conv_params(1, nfm, stride=2)), Conv(**conv_params(3, nfm)), Conv(**conv_params(1, nfm * 4, relu=False, batch_norm=False)) ] sidepath = [ Conv(**conv_params(1, nfm * 4, stride=2, relu=False, batch_norm=False)) ] module.append(MergeSum([sidepath, mainpath])) return module
def mlp_layer(name, nout, activation=relu, batch_norm=False, bias=None): """ Layer configuration for MLP generator/discriminator Arguments: name (string): Layer name nout (int): Number of output feature maps activation (object): Activation function (defaults to ReLu) batch_norm(bool): Enable batch normalization (defaults to False) """ layers = [] layers.append(Linear(nout=nout, init=init_w, bsum=batch_norm, name=name)) if batch_norm: layers.append(BatchNorm(name=name + '_bnorm', **bn_prm)) if bias is not None: layers.append(Bias(init=None, name=name + '_bias')) layers.append(Activation(transform=activation, name=name + '_rectlin')) return layers
def test_model_serialize(backend): (X_train, y_train), (X_test, y_test), nclass = load_mnist() train_set = DataIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = [ Conv((5, 5, 16), init=init_norm, bias=Constant(0), activation=Rectlin()), Pooling(2), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ] path2 = [ Dropout(keep=0.5), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ] layers = [ MergeConcat([path1, path2]), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()), BatchNorm(), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True)) ] tmp_save = 'test_model_serialize_tmp_save.pickle' mlp = Model(layers=layers) mlp.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary()) n_test = 3 num_epochs = 3 # Train model for num_epochs and n_test batches for epoch in range(num_epochs): for i, (x, t) in enumerate(train_set): x = mlp.fprop(x) delta = mlp.cost.get_errors(x, t) mlp.bprop(delta) mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch) if i > n_test: break # Get expected outputs of n_test batches and states of all layers outputs_exp = [] pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs_exp.append(mlp.fprop(x, inference=True)) if i > n_test: break # Serialize model save_obj(mlp.serialize(keep_states=True), tmp_save) # Load model mlp = Model(layers=layers) mlp.load_weights(tmp_save) outputs = [] pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs.append(mlp.fprop(x, inference=True)) if i > n_test: break # Check outputs, states, and params are the same for output, output_exp in zip(outputs, outputs_exp): assert np.allclose(output.get(), output_exp.get()) for pd, pd_exp in zip(pdicts, pdicts_exp): for s, s_e in zip(pd['states'], pd_exp['states']): if isinstance(s, list): # this is the batch norm case for _s, _s_e in zip(s, s_e): assert np.allclose(_s, _s_e) else: assert np.allclose(s, s_e) for p, p_e in zip(pd['params'], pd_exp['params']): if isinstance(p, list): # this is the batch norm case for _p, _p_e in zip(p, p_e): assert np.allclose(_p, _p_e) else: assert np.allclose(p, p_e) os.remove(tmp_save)
batch_size=batch_size, rng_seed=args.rng_seed, device_id=args.device_id, default_dtype=args.datatype) (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir) train_set = DataIterator(X_train, y_train, nclass=nclass) valid_set = DataIterator(X_test, y_test, nclass=nclass) # weight initialization init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model layers = [] layers.append(Affine(nout=100, init=init_norm, activation=Rectlin())) layers.append(BatchNorm()) layers.append( Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) mlp = Model(layers=layers) # define stopping function # it takes as input a tuple (State,val[t]) # which describes the cumulative validation state (generated by this function) # and the validation error at time t # and returns as output a tuple (State', Bool), # which represents the new state and whether to stop def stopFunc(s, v): # Stop if validation error ever increases from epoch to epoch
init = Gaussian(scale=0.01) # discriminiator using convolution layers lrelu = Rectlin(slope=0.1) # leaky relu for discriminator # sigmoid = Logistic() # sigmoid activation function conv1 = dict(init=init, batch_norm=False, activation=lrelu, bias=init) conv2 = dict(init=init, batch_norm=False, activation=lrelu, padding=2, bias=init) conv3 = dict(init=init, batch_norm=False, activation=lrelu, padding=1, bias=init) b1 = BranchNode("b1") b2 = BranchNode("b2") branch1 = [ b1, Conv((5, 5, 5, 32), **conv1), Dropout(keep = 0.8), Conv((5, 5, 5, 8), **conv2), BatchNorm(), Dropout(keep = 0.8), Conv((5, 5, 5, 8), **conv2), BatchNorm(), Dropout(keep = 0.8), Conv((5, 5, 5, 8), **conv3), BatchNorm(), Dropout(keep = 0.8), Pooling((2, 2, 2)), Affine(1024, init=init, activation=lrelu), BatchNorm(), Affine(1024, init=init, activation=lrelu), BatchNorm(), b2, Affine(nout=1, init=init, bias=init, activation=Logistic()) ] #real/fake