import numpy as np parser = NeonArgparser(__doc__) args = parser.parse_args() NervanaObject.be.enable_winograd = 4 # setup data provider X_train = np.random.uniform(-1, 1, (128, 3 * 231 * 231)) y_train = np.random.randint(0, 999, (128, 1000)) train = ArrayIterator(X_train, y_train, nclass=1000, lshape=(3, 231, 231)) layers = [ Conv((11, 11, 96), init=Gaussian(scale=0.01), activation=Rectlin(), padding=0, strides=4), Pooling(2, strides=2), Conv((5, 5, 256), init=Gaussian(scale=0.01), activation=Rectlin(), padding=0), Pooling(2, strides=2), Conv((3, 3, 512), init=Gaussian(scale=0.01), activation=Rectlin(), padding=1), Conv((3, 3, 1024), init=Gaussian(scale=0.01), activation=Rectlin(),
def test_reshape_layer_model(backend_default, fargs): """ test cases: - conv before RNNs - conv after RNNs - conv after LUT """ np.random.seed(seed=0) nin, nout, bsz = fargs be = backend_default be.bsz = bsz input_size = (nin, be.bsz) init = Uniform(-0.1, 0.1) g_uni = GlorotUniform() inp_np = np.random.rand(nin, be.bsz) delta_np = np.random.rand(nout, be.bsz) inp = be.array(inp_np) delta = be.array(delta_np) conv_lut_1 = [ LookupTable(vocab_size=2000, embedding_dim=400, init=init), Reshape(reshape=(4, 100, -1)), Conv((3, 3, 16), init=init), LSTM(64, g_uni, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), RecurrentSum(), Affine(nout, init, bias=init, activation=Softmax()) ] conv_lut_2 = [ LookupTable(vocab_size=1000, embedding_dim=400, init=init), Reshape(reshape=(4, 50, -1)), Conv((3, 3, 16), init=init), Pooling(2, strides=2), Affine(nout=nout, init=init, bias=init, activation=Softmax()), ] conv_rnn_1 = [ LookupTable(vocab_size=2000, embedding_dim=400, init=init), LSTM(64, g_uni, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), Reshape(reshape=(4, 32, -1)), Conv((3, 3, 16), init=init), Affine(nout, init, bias=init, activation=Softmax()) ] conv_rnn_2 = [ LookupTable(vocab_size=2000, embedding_dim=400, init=init), Recurrent(64, g_uni, activation=Tanh(), reset_cells=True), Reshape(reshape=(4, -1, 32)), Conv((3, 3, 16), init=init), Affine(nout, init, bias=init, activation=Softmax()) ] lut_sum_1 = [ LookupTable(vocab_size=1000, embedding_dim=128, init=init), RecurrentSum(), Affine(nout=nout, init=init, bias=init, activation=Softmax()), ] lut_birnn_1 = [ LookupTable(vocab_size=1000, embedding_dim=200, init=init), DeepBiRNN(32, init=GlorotUniform(), batch_norm=True, activation=Tanh(), reset_cells=True, depth=1), Reshape((4, 32, -1)), Conv((3, 3, 16), init=init), Affine(nout=nout, init=init, bias=init, activation=Softmax()) ] layers_test = [ conv_lut_1, conv_lut_2, conv_rnn_1, conv_rnn_2, lut_sum_1, lut_birnn_1 ] for lg in layers_test: model = Model(layers=lg) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.initialize(input_size, cost) model.fprop(inp) model.bprop(delta)
def add_vgg_layers(): # setup layers init1_vgg = Xavier(local=True) relu = Rectlin() conv_params = { 'strides': 1, 'padding': 1, 'init': init1_vgg, 'bias': Constant(0), 'activation': relu } # Set up the model layers vgg_layers = [] # set up 3x3 conv stacks with different feature map sizes vgg_layers.append(Conv((3, 3, 64), **conv_params)) vgg_layers.append(Conv((3, 3, 64), **conv_params)) vgg_layers.append(Pooling(2, strides=2)) vgg_layers.append(Conv((3, 3, 128), **conv_params)) vgg_layers.append(Conv((3, 3, 128), **conv_params)) vgg_layers.append(Pooling(2, strides=2)) vgg_layers.append(Conv((3, 3, 256), **conv_params)) vgg_layers.append(Conv((3, 3, 256), **conv_params)) vgg_layers.append(Conv((3, 3, 256), **conv_params)) vgg_layers.append(Pooling(2, strides=2)) vgg_layers.append(Conv((3, 3, 512), **conv_params)) vgg_layers.append(Conv((3, 3, 512), **conv_params)) vgg_layers.append(Conv((3, 3, 512), **conv_params)) vgg_layers.append(Pooling(2, strides=2)) vgg_layers.append(Conv((3, 3, 512), **conv_params)) vgg_layers.append(Conv((3, 3, 512), **conv_params)) vgg_layers.append(Conv((3, 3, 512), **conv_params)) # not used after this layer # vgg_layers.append(Pooling(2, strides=2)) # vgg_layers.append(Affine(nout=4096, init=initfc, bias=Constant(0), activation=relu)) # vgg_layers.append(Dropout(keep=0.5)) # vgg_layers.append(Affine(nout=4096, init=initfc, bias=Constant(0), activation=relu)) # vgg_layers.append(Dropout(keep=0.5)) # vgg_layers.append(Affine(nout=1000, init=initfc, bias=Constant(0), activation=Softmax())) return vgg_layers
eval_set = ArrayIterator(X_train[1000:2000], y_train[1000:2000], make_onehot=False, lshape=(3, 256, 256)) test_set = ArrayIterator(X_train[2000:2500], y_train[2000:2500], make_onehot=False, lshape=(3, 256, 256)) # weight initialization init_norm = Gaussian(loc=0.0, scale=0.01) # setup model layers layers = [ Conv((5, 5, 16), init=init_norm, activation=Rectlin()), Pooling(2), Conv((5, 5, 32), init=init_norm, activation=Rectlin()), Pooling(2), Conv((3, 3, 32), init=init_norm, activation=Rectlin()), Pooling(2), Affine(nout=100, init=init_norm, activation=Rectlin()), Linear(nout=4, init=init_norm) ] model = Model(layers=layers) # cost = GeneralizedCost(costfunc=CrossEntropyBinary()) cost = GeneralizedCost(costfunc=SumSquared()) # fit and validate optimizer = RMSProp()
test = ImageLoader(set_name='validation', scale_range=(256, 256), do_transforms=False, **img_set_options) init_g1 = Gaussian(scale=0.01) init_g2 = Gaussian(scale=0.005) relu = Rectlin() layers = [] layers.append( Conv((11, 11, 96), padding=0, strides=4, init=init_g1, bias=Constant(0), activation=relu, name='conv1')) layers.append(Pooling(3, strides=2, name='pool1')) layers.append(LRN(5, ascale=0.0001, bpower=0.75, name='norm1')) layers.append( Conv((5, 5, 256), padding=2, init=init_g1, bias=Constant(1.0), activation=relu, name='conv2')) layers.append(Pooling(3, strides=2, name='pool2'))
common = dict(target_size=1, nclasses=2) tain_set = 'full' if args.test_mode else 'tain' test_set = 'test' if args.test_mode else 'eval' test_dir = data_dir.replace('train', 'test') if args.test_mode else data_dir tain = DataLoader(set_name=tain_set, media_params=tain_params, index_file=tain_idx, repo_dir=data_dir, **common) test = DataLoader(set_name=test_set, media_params=test_params, index_file=test_idx, repo_dir=test_dir, **common) gauss = Gaussian(scale=0.01) glorot = GlorotUniform() tiny = dict(str_h=1, str_w=1) small = dict(str_h=1, str_w=2) big = dict(str_h=1, str_w=4) common = dict(batch_norm=True, activation=Rectlin()) layers = [Conv((3, 5, 64), init=gauss, activation=Rectlin(), strides=big), Pooling(2, strides=2), Conv((3, 3, 128), init=gauss, strides=small, **common), Pooling(2, strides=2), Conv((3, 3, 256), init=gauss, strides=small, **common), Conv((2, 2, 512), init=gauss, strides=tiny, **common), DeepBiRNN(128, init=glorot, reset_cells=True, depth=3, **common), RecurrentMean(), Affine(nout=2, init=gauss, activation=Softmax())] model = Model(layers=layers) opt = Adagrad(learning_rate=0.0001) callbacks = Callbacks(model, eval_set=test, **args.callback_args) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.fit(tain, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
init1_vgg = Xavier(local=True) relu = Rectlin() conv_params = { 'strides': 1, 'padding': 1, 'init': init1_vgg, 'bias': Constant(0), 'activation': relu } # Set up the model layers layers = [] # set up 3x3 conv stacks with different feature map sizes layers.append(Conv((3, 3, 64), name="skip", **conv_params)) layers.append(Conv((3, 3, 64), name="skip", **conv_params)) layers.append(Pooling(2, strides=2)) layers.append(Conv((3, 3, 128), name="skip", **conv_params)) layers.append(Conv((3, 3, 128), name="skip", **conv_params)) layers.append(Pooling(2, strides=2)) layers.append(Conv((3, 3, 256), **conv_params)) layers.append(Conv((3, 3, 256), **conv_params)) layers.append(Conv((3, 3, 256), **conv_params)) layers.append(Pooling(2, strides=2)) layers.append(Conv((3, 3, 512), **conv_params)) layers.append(Conv((3, 3, 512), **conv_params)) layers.append(Conv((3, 3, 512), **conv_params)) layers.append(Pooling(2, strides=2)) layers.append(Conv((3, 3, 512), **conv_params)) layers.append(Conv((3, 3, 512), **conv_params))
val_params = AudioParams(**common_params) common = dict(target_size=1, nclasses=10, repo_dir=args.data_dir) train = DataLoader(set_name='genres-train', media_params=train_params, index_file=train_idx, shuffle=True, **common) val = DataLoader(set_name='genres-val', media_params=val_params, index_file=val_idx, shuffle=False, **common) init = Gaussian(scale=0.01) layers = [ Conv((7, 7, 32), init=init, activation=Rectlin(), strides=dict(str_h=2, str_w=4)), Pooling(2, strides=2), Conv((5, 5, 64), init=init, batch_norm=True, activation=Rectlin(), strides=dict(str_h=1, str_w=2)), DeepBiRNN(128, init=GlorotUniform(), batch_norm=True, activation=Rectlin(), reset_cells=True, depth=3), RecurrentMean(), Affine(nout=common['nclasses'], init=init, activation=Softmax())
batch_norm=False, padding=1, activation=Logistic(shortcut=False)) ] # discriminiator using convolution layers lrelu = Rectlin(slope=0.1) # leaky relu for discriminator conv = dict(init=init, batch_norm=True, activation=lrelu) convp1 = dict(init=init, batch_norm=True, activation=lrelu, padding=1) convp1s2 = dict(init=init, batch_norm=True, activation=lrelu, padding=1, strides=2) D_layers = [ Conv((3, 3, 96), name="D11", **convp1), Conv((3, 3, 96), name="D12", **convp1s2), Conv((3, 3, 192), name="D21", **convp1), Conv((3, 3, 192), name="D22", **convp1s2), Conv((3, 3, 192), name="D31", **convp1), Conv((1, 1, 16), name="D32", **conv), Conv((7, 7, 1), name="D_out", init=init, batch_norm=False, activation=Logistic(shortcut=False)) ] layers = GenerativeAdversarial(generator=Sequential(G_layers, name="Generator"), discriminator=Sequential(D_layers,
def layers(self): bn = True return [ # input 128 Conv((7, 7, 96), init=Kaiming(), bias=Constant(0), activation=Explin(), padding=3, strides=1), Pooling(3, strides=2, padding=1), # 64 Conv((7, 7, 128), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=3, strides=1), Pooling(3, strides=2, padding=1), # 32 Conv((5, 5, 256), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=2, strides=1), Pooling(3, strides=2, padding=1), # 16 Conv((3, 3, 384), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1), Conv((3, 3, 384), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1), Conv((3, 3, 384), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1), Pooling(3, strides=2, padding=1), # 8 Conv((3, 3, 8192), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1), Pooling('all', op='avg'), Affine(nout=self.noutputs, init=Kaiming(), bias=Constant(0), activation=Softmax() if self.use_softmax else Logistic( shortcut=True)) ]
#plt.savefigure('data_img.png') # setup weight initialization function init = Gaussian(scale=0.01) # discriminiator using convolution layers lrelu = Rectlin(slope=0.1) # leaky relu for discriminator # sigmoid = Logistic() # sigmoid activation function conv1 = dict(init=init, batch_norm=False, activation=lrelu, bias=init) conv2 = dict(init=init, batch_norm=False, activation=lrelu, padding=2, bias=init) conv3 = dict(init=init, batch_norm=False, activation=lrelu, padding=1, bias=init) b1 = BranchNode("b1") b2 = BranchNode("b2") branch1 = [ b1, Conv((5, 5, 5, 32), **conv1), Dropout(keep = 0.8), Conv((5, 5, 5, 8), **conv2), BatchNorm(), Dropout(keep = 0.8), Conv((5, 5, 5, 8), **conv2), BatchNorm(), Dropout(keep = 0.8), Conv((5, 5, 5, 8), **conv3), BatchNorm(), Dropout(keep = 0.8), Pooling((2, 2, 2)), Affine(1024, init=init, activation=lrelu), BatchNorm(), Affine(1024, init=init, activation=lrelu), BatchNorm(),
def layers(self): bn = True return [ # input 128 Conv((7, 7, 64), init=Kaiming(), bias=Constant(0), activation=Explin(), padding=3, strides=1), Pooling(3, strides=2, padding=1), # 64 Conv((3, 3, 96), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1), Conv((3, 3, 96), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1), Pooling(3, strides=2, padding=1), # 32 Conv((3, 3, 192), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1), Conv((3, 3, 192), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1), Pooling(3, strides=2, padding=1), # 16 Conv((3, 3, 384), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1), Conv((3, 3, 384), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1), Conv((3, 3, 384), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1), # this 4th deep layer may have been in for vgg3pool64all run? can not fit for 6fold so commented #Conv((3, 3, 384), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1), Pooling(3, strides=2, padding=1), # 8 Conv((3, 3, 6144), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1), Pooling('all', op='avg'), Affine(nout=self.noutputs, init=Kaiming(), bias=Constant(0), activation=Softmax() if self.use_softmax else Logistic( shortcut=True)) ]
init_uni = GlorotUniform() # The parameters below are straight out of [Springenberg2014] opt_gdm = GradientDescentMomentum(learning_rate=0.01, schedule=Schedule(step_config=[10], change=0.1), momentum_coef=0.9, wdecay=.0005) # set up model layers layers = [] layers.append(DataTransform(transform=Normalizer(divisor=128.))) layers.append( Conv((11, 11, 96), init=init_uni, activation=relu, strides=4, padding=1)) layers.append(Conv((1, 1, 96), init=init_uni, activation=relu, strides=1)) layers.append( Conv((3, 3, 96), init=init_uni, activation=relu, strides=2, padding=1)) # 54->27 layers.append(Conv((5, 5, 256), init=init_uni, activation=relu, strides=1)) # 27->23 layers.append(Conv((1, 1, 256), init=init_uni, activation=relu, strides=1)) layers.append( Conv((3, 3, 256), init=init_uni, activation=relu, strides=2, padding=1)) # 23->12 layers.append( Conv((3, 3, 384), init=init_uni, activation=relu, strides=1, padding=1)) layers.append(Conv((1, 1, 384), init=init_uni, activation=relu, strides=1))
parser.add_argument('-iw', '--image_width', default=384, help='image width') args = parser.parse_args() imwidth = int(args.image_width) train = ClassifierLoader(repo_dir=args.data_dir, inner_size=imwidth, set_name='train', do_transforms=False) train.init_batch_provider() init = Gaussian(scale=0.01) opt = Adadelta(decay=0.9) common = dict(init=init, batch_norm=True, activation=Rectlin()) layers = [] nchan = 64 layers.append(Conv((2, 2, nchan), strides=2, **common)) for idx in range(5): if nchan > 1024: nchan = 1024 layers.append(Conv((3, 3, nchan), strides=1, **common)) layers.append(Pooling(2, strides=2)) nchan *= 2 #layers.append(Affine(nout=4096, init=init, activation=Rectlin(), batch_norm=True)) layers.append(DropoutBinary(keep=0.2)) layers.append(Affine(nout=447, init=init, activation=Softmax())) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) mlp = Model(layers=layers) callbacks = Callbacks(mlp, train, **args.callback_args) mlp.fit(train, optimizer=opt,
# drop LR by 1/250**(1/3) at beginning of epochs 23, 45, 66 weight_sched = Schedule([22, 44, 65], (1 / 250.)**(1 / 3.)) opt_gdm = GradientDescentMomentum(0.01, 0.9, wdecay=0.0005, schedule=weight_sched) # drop bias weights by 1/10 at the beginning of epoch 45. opt_biases = GradientDescentMomentum(0.02, 0.9, schedule=Schedule([44], 0.1)) # Set up the model layers layers = [] layers.append( Conv((11, 11, 64), strides=4, padding=3, init=init1, bias=Constant(0), activation=relu)) layers.append(Pooling(3, strides=2)) layers.append( Conv((5, 5, 192), padding=2, init=init1, bias=Constant(1), activation=relu)) layers.append(Pooling(3, strides=2)) layers.append( Conv((3, 3, 384), padding=1, init=init2, bias=Constant(0), activation=relu)) layers.append( Conv((3, 3, 256), padding=1, init=init2, bias=Constant(1), activation=relu)) layers.append( Conv((3, 3, 256), padding=1, init=init2, bias=Constant(1),
change=float( assignments.get("momentum_step_change"))), ) relu = Rectlin() conv = dict(init=init_uni, batch_norm=False, activation=relu) convp1 = dict(init=init_uni, batch_norm=False, activation=relu, padding=1) convp1s2 = dict(init=init_uni, batch_norm=False, activation=relu, padding=1, strides=2) layers = [ Dropout(keep=.8), Conv((3, 3, 96), **convp1), Conv((3, 3, 96), **convp1), Conv((3, 3, 96), **convp1s2), Dropout(keep=.5), Conv((3, 3, 192), **convp1), Conv((3, 3, 192), **convp1), Conv((3, 3, 192), **convp1s2), Dropout(keep=.5), Conv((3, 3, 192), **convp1), Conv((1, 1, 192), **conv), Conv((1, 1, 16), **conv), Pooling(8, op="avg"), Activation(Softmax()) ] cost = GeneralizedCost(costfunc=CrossEntropyMulti())
def build_model(dataset, frcn_rois_per_img, train_pre_nms_N=12000, train_post_nms_N=2000, test_pre_nms_N=6000, test_post_nms_N=300, inference=False): """ Returns the Faster-RCNN model. For inference, also returns a reference to the proposal layer. Faster-RCNN contains three modules: VGG, the Region Proposal Network (RPN), and the Classification Network (ROI-pooling + Fully Connected layers), organized as a tree. Tree has 4 branches: VGG -> b1 -> Conv (3x3) -> b2 -> Conv (1x1) -> CrossEntropyMulti (objectness label) b2 -> Conv (1x1) -> SmoothL1Loss (bounding box targets) b1 -> PropLayer -> ROI -> Affine -> Affine -> b3 -> Affine -> CrossEntropyMulti b3 -> Affine -> SmoothL1Loss When the model is constructed for inference, several elements are different: - The number of regions to keep before and after non-max suppression is (6000, 300) for training and (12000, 2000) for inference. - The out_shape of the proposalLayer of the network is equal to post_nms_N (number of rois to keep after performaing nms). This is configured by passing the inference flag to the proposalLayer constructor. Arguments: dataset (objectlocalization): Dataset object. frcn_rois_per_img (int): Number of ROIs per image considered by the classification network. inference (bool): Construct the model for inference. Default is False. Returns: model (Model): Faster-RCNN model. proposalLayer (proposalLayer): Reference to proposalLayer in the model. Returned only for inference=True. """ num_classes = dataset.num_classes # define the branch points b1 = BranchNode(name="conv_branch") b2 = BranchNode(name="rpn_branch") b3 = BranchNode(name="roi_branch") # define VGG VGG = util.add_vgg_layers() # define RPN rpn_init = dict(strides=1, init=Gaussian(scale=0.01), bias=Constant(0)) # these references are passed to the ProposalLayer. RPN_3x3 = Conv((3, 3, 512), activation=Rectlin(), padding=1, **rpn_init) RPN_1x1_obj = Conv((1, 1, 18), activation=PixelwiseSoftmax(c=2), padding=0, **rpn_init) RPN_1x1_bbox = Conv((1, 1, 36), activation=Identity(), padding=0, **rpn_init) # inference uses different network settings if not inference: pre_nms_N = train_pre_nms_N # default 12000 post_nms_N = train_post_nms_N # default 2000 else: pre_nms_N = test_pre_nms_N # default 6000 post_nms_N = test_post_nms_N # default 300 proposalLayer = ProposalLayer([RPN_1x1_obj, RPN_1x1_bbox], dataset, pre_nms_N=pre_nms_N, post_nms_N=post_nms_N, num_rois=frcn_rois_per_img, inference=inference) # define ROI classification network ROI = [ proposalLayer, RoiPooling(HW=(7, 7)), Affine(nout=4096, init=Gaussian(scale=0.005), bias=Constant(.1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.005), bias=Constant(.1), activation=Rectlin()), Dropout(keep=0.5) ] ROI_category = Affine(nout=num_classes, init=Gaussian(scale=0.01), bias=Constant(0), activation=Softmax()) ROI_bbox = Affine(nout=4 * num_classes, init=Gaussian(scale=0.001), bias=Constant(0), activation=Identity()) # build the model # the four branches of the tree mirror the branches listed above frcn_tree = Tree([ROI + [b3, ROI_category], [b3, ROI_bbox]]) model = Model(layers=Tree([ VGG + [b1, RPN_3x3, b2, RPN_1x1_obj], [b2, RPN_1x1_bbox], [b1] + [frcn_tree], ])) if inference: return (model, proposalLayer) else: return model
args = parser.parse_args() # Load dataset (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir) # Set input and target to X_train train = ArrayIterator(X_train, lshape=(1, 28, 28)) # Initialize the weights and the learning rule init_uni = Uniform(low=-0.1, high=0.1) opt_gdm = GradientDescentMomentum(learning_rate=0.001, momentum_coef=0.9) # Strided conv autoencoder bn = False layers = [ Conv((4, 4, 8), init=init_uni, activation=Rectlin(), batch_norm=bn), Pooling(2), Conv((4, 4, 32), init=init_uni, activation=Rectlin(), batch_norm=bn), Pooling(2), Deconv(fshape=(4, 4, 8), init=init_uni, activation=Rectlin(), batch_norm=bn), Deconv(fshape=(3, 3, 8), init=init_uni, activation=Rectlin(), strides=2, batch_norm=bn), Deconv(fshape=(2, 2, 1), init=init_uni, strides=2, padding=1) ]
flip_enable=True, rot90_enable=True, crop_enable=False, border_size=5) valid_set = HDF5IteratorOneHot('/mnt/data/medical/luna16/luna16_roi_subset{}_augmented.h5'.format(SUBSET), \ flip_enable=False, rot90_enable=False, crop_enable=False, border_size=5) print('Using subset{}'.format(SUBSET)) init_uni = Kaiming() relu = Rectlin() bn = True convp1 = dict(init=init_uni, batch_norm=bn, activation=relu, padding=1) layers = [ Conv((5, 5, 24), **convp1), Pooling(2, op='max'), Conv((3, 3, 32), **convp1), Pooling(2, op='max'), Conv((3, 3, 48), **convp1), Pooling('all', op='avg'), Affine(512, init=init_uni, batch_norm=True, activation=relu), Affine(2, init=init_uni, activation=Softmax()) ] cost = GeneralizedCost(costfunc=CrossEntropyBinary()) lunaModel = Model(layers=layers) modelFileName = 'LUNA16_CADIMI_subset{}.prm'.format(SUBSET)
train.init_batch_provider() test.init_batch_provider() init1 = GlorotUniform() relu = Rectlin() common_params = dict(init=init1, activation=Rectlin(), batch_norm=use_batch_norm, bias=biases) conv_params = dict(padding=1, **common_params) # Set up the model layers, using 3x3 conv stacks with different feature map sizes layers = [] for nofm in [64, 128, 256, 512, 512]: layers.append(Conv((3, 3, nofm), **conv_params)) layers.append(Conv((3, 3, nofm), **conv_params)) if nofm > 128: if VGG in ('D', 'E'): layers.append(Conv((3, 3, nofm), **conv_params)) if VGG == 'E': layers.append(Conv((3, 3, nofm), **conv_params)) layers.append(Pooling(3, strides=2)) layers.append(Affine(nout=4096, **common_params)) layers.append(Dropout(keep=0.5)) layers.append(Affine(nout=4096, **common_params)) layers.append(Dropout(keep=0.5)) layers.append( Affine(nout=1000, init=init1, bias=Constant(0), activation=Softmax()))
train = ArrayIterator(X_train, y_train, nclass=nclass, lshape=(3, 32, 32)) test = ArrayIterator(X_test, y_test, nclass=nclass, lshape=(3, 32, 32)) init_uni = Uniform(low=-0.1, high=0.1) if args.datatype in [np.float32, np.float64]: opt_gdm = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9, stochastic_round=args.rounding) elif args.datatype in [np.float16]: opt_gdm = GradientDescentMomentum(learning_rate=0.01 / cost_scale, momentum_coef=0.9, stochastic_round=args.rounding) bn = True layers = [ Conv((5, 5, 16), init=init_uni, activation=Rectlin(), batch_norm=bn), Pooling((2, 2)), Conv((5, 5, 32), init=init_uni, activation=Rectlin(), batch_norm=bn), Pooling((2, 2)), Affine(nout=500, init=init_uni, activation=Rectlin(), batch_norm=bn), Affine(nout=10, init=init_uni, activation=Softmax()) ] if args.datatype in [np.float32, np.float64]: cost = GeneralizedCost(costfunc=CrossEntropyMulti()) elif args.datatype in [np.float16]: cost = GeneralizedCost(costfunc=CrossEntropyMulti(scale=cost_scale)) model = Model(layers=layers) # configure callbacks
Conv(**conv_params(1, nfm * 4, relu=False, batch_norm=False)) ] sidepath = [ Conv(**conv_params(1, nfm * 4, stride=2, relu=False, batch_norm=False)) ] module.append(MergeSum([sidepath, mainpath])) return module # Structure of the deep residual part of the network: # args.depth modules of 2 convolutional layers each at feature map depths of 16, 32, 64 nfms = [2**(stage + 4) for stage in sorted(range(3) * args.depth)] strides = [1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])] # Now construct the network layers = [Conv(**conv_params(3, 16))] layers.append(module_s1(nfms[0], True)) for nfm, stride in zip(nfms[1:], strides): res_module = module_s1(nfm) if stride == 1 else module_s2(nfm) layers.append(res_module) layers.append(BatchNorm()) layers.append(Activation(Rectlin())) layers.append(Pooling('all', op='avg')) layers.append( Affine(10, init=Kaiming(local=False), batch_norm=True, activation=Softmax())) model = Model(layers=layers)
parser = NeonArgparser(__doc__) args = parser.parse_args() train_idx, valid_idx = create_index_files(args.data_dir) common_params = dict(sampling_freq=22050, clip_duration=16000, frame_duration=16) train_params = AudioParams(**common_params) valid_params = AudioParams(**common_params) common = dict(target_size=1, nclasses=10, repo_dir=args.data_dir) train = DataLoader(set_name='music-train', media_params=train_params, index_file=train_idx, shuffle=True, **common) valid = DataLoader(set_name='music-valid', media_params=valid_params, index_file=valid_idx, shuffle=False, **common) init = Gaussian(scale=0.01) layers = [Conv((2, 2, 4), init=init, activation=Rectlin(), strides=dict(str_h=2, str_w=4)), Pooling(2, strides=2), Conv((3, 3, 4), init=init, batch_norm=True, activation=Rectlin(), strides=dict(str_h=1, str_w=2)), DeepBiRNN(128, init=GlorotUniform(), batch_norm=True, activation=Rectlin(), reset_cells=True, depth=3), RecurrentMean(), Affine(nout=common['nclasses'], init=init, activation=Softmax())] model = Model(layers=layers) opt = Adagrad(learning_rate=0.01, gradient_clip_value=15) metric = Misclassification() callbacks = Callbacks(model, eval_set=valid, metric=metric, **args.callback_args) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
def test_multi_optimizer(backend_default_mkl): """ A test for MultiOptimizer. """ opt_gdm = GradientDescentMomentum(learning_rate=0.001, momentum_coef=0.9, wdecay=0.005) opt_ada = Adadelta() opt_adam = Adam() opt_rms = RMSProp() opt_rms_1 = RMSProp(gradient_clip_value=5) init_one = Gaussian(scale=0.01) l1 = Conv((11, 11, 64), strides=4, padding=3, init=init_one, bias=Constant(0), activation=Rectlin()) l2 = Affine(nout=4096, init=init_one, bias=Constant(1), activation=Rectlin()) l3 = LSTM(output_size=1000, init=init_one, activation=Logistic(), gate_activation=Tanh()) l4 = GRU(output_size=100, init=init_one, activation=Logistic(), gate_activation=Tanh()) layers = [l1, l2, l3, l4] layer_list = [] for layer in layers: if isinstance(layer, list): layer_list.extend(layer) else: layer_list.append(layer) for l in layer_list: l.configure(in_obj=(16, 28, 28)) l.allocate() # separate layer_list into two, the last two recurrent layers and the rest layer_list1, layer_list2 = layer_list[:-2], layer_list[-2:] opt = MultiOptimizer({ 'default': opt_gdm, 'Bias': opt_ada, 'Convolution': opt_adam, 'Convolution_bias': opt_adam, 'Linear': opt_rms, 'LSTM': opt_rms_1, 'GRU': opt_rms_1 }) layers_to_optimize1 = [ l for l in layer_list1 if isinstance(l, ParameterLayer) ] layers_to_optimize2 = [ l for l in layer_list2 if isinstance(l, ParameterLayer) ] opt.optimize(layers_to_optimize1, 0) # temp roll back conv_bias if False and l1[0].be.is_mkl(): assert opt.map_list[opt_adam][ 0].__class__.__name__ is 'Convolution_bias' else: assert opt.map_list[opt_adam][0].__class__.__name__ is 'Convolution' assert opt.map_list[opt_ada][0].__class__.__name__ == 'Bias' assert opt.map_list[opt_rms][0].__class__.__name__ == 'Linear' opt.optimize(layers_to_optimize2, 0) assert opt.map_list[opt_rms_1][0].__class__.__name__ == 'LSTM' assert opt.map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
def mergesum_test_config(be, modfunc, use_stride=1): l1 = Conv(**conv_params(3, 16)) neon_layer = modfunc(16, use_stride) inshape = (16, 32, 32) insize = np.prod(inshape) inpa = np.random.random((insize, batch_size)) neon_seq = Sequential([l1] + neon_layer) neon_seq.configure(inshape) inp = be.array(inpa) neon_seq.allocate() # neon_layer.layers[0].prev_layer = True neon_seq.allocate_deltas() neon_out = neon_seq.fprop(inp).get() # Now make the reference pathways: p1, p2 = module_factory_copy(neon_layer, modfunc, 16, use_stride) l11 = Conv(**conv_params(3, 16)) l12 = Conv(**conv_params(3, 16)) for ll in (l11, l12): for lcopy, lref in zip(ll, l1): if lcopy.has_params: lcopy.set_params(lref.get_params_serialize()) path1 = Sequential([l11] + p1) path2 = Sequential([l12] + p2) for ll in (path1, path2): ll.configure(inshape) ll.allocate() ll.allocate_deltas() o1 = path1.fprop(inp) o2 = path2.fprop(inp) neon_out_ref = be.empty_like(o1) neon_out_ref[:] = be.maximum(o1 + o2, 0) # need to have bsum false for this test to be valid assert allclose_with_out(neon_out_ref.get(), neon_out, rtol=0) erra = np.random.random(neon_out.shape) err = be.array(erra) ebr = neon_seq.layers[-1].bprop(err) ebr = neon_seq.layers[-2].bprop(ebr) trunk_neon = ebr.get() err = be.array(erra) err[:] = be.greater(neon_out_ref, 0) * err pstart = len(l1) eb1 = err for l in reversed(path1.layers[pstart:]): eb1 = l.bprop(eb1) eb2 = err for l in reversed(path2.layers[pstart:]): eb2 = l.bprop(eb2) err_ref = be.empty_like(eb1) err_ref[:] = eb1 + eb2 assert allclose_with_out(err_ref.get(), trunk_neon, rtol=0)
scale_range=32, shuffle=True, **options) valid_set = ImageLoader(set_name='validation', scale_range=32, do_transforms=False, **options) # define model nfilters = [96, 192, 256] init_w = Gaussian(scale=0.01) relu = Rectlin() common_params = dict(init=init_w, activation=relu) convp1 = dict(padding=1, **common_params) layers = [ Conv((3, 3, nfilters[0]), bias=Constant(0.1), **convp1), Conv((3, 3, nfilters[0]), bias=Constant(0.1), **convp1), Pooling(3, strides=2, padding=1), # 32 -> 16 Dropout(keep=0.7), Conv((3, 3, nfilters[1]), bias=Constant(0.1), **convp1), Conv((3, 3, nfilters[1]), bias=Constant(0.1), **convp1), Pooling(3, strides=2, padding=1), # 16 -> 8 Dropout(keep=0.8), Conv((3, 3, nfilters[2]), bias=Constant(0.1), **convp1), Conv((3, 3, nfilters[2]), bias=Constant(0.1), **convp1), Conv((3, 3, nfilters[2]), bias=Constant(0.1), **convp1), Pooling(3, strides=2, padding=1), # 8 -> 4 Dropout(keep=0.7), Affine(nout=10, bias=Constant(0.1), activation=Softmax(),
be = gen_backend(backend='gpu', batch_size=128, datatype=np.float32) # setup a dataset iterator mnist = MNIST(path='../dataset/mnist') (X_train, y_train), (X_test, y_test), nclass = mnist.load_data() train_set = ArrayIterator(X_train, y_train, nclass=nclass, lshape=(1, 28, 28)) valid_set = ArrayIterator(X_test, y_test, nclass=nclass, lshape=(1, 28, 28)) # define model nfilters = [20, 50, 500] # nfilters = [24, 56, 500] init_w = Gaussian(scale=0.01) relu = Rectlin() common_params = dict(init=init_w, activation=relu) layers = [ Conv((5, 5, nfilters[0]), bias=Constant(0.1), padding=0, **common_params), Pooling(2, strides=2, padding=0), Conv((5, 5, nfilters[1]), bias=Constant(0.1), padding=0, **common_params), Pooling(2, strides=2, padding=0), Affine(nout=nfilters[2], bias=Constant(0.1), **common_params), Affine(nout=10, bias=Constant(0.1), activation=Softmax(), init=Gaussian(scale=0.01)) ] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.initialize(train_set, cost) model.load_params('models/mnist/mnist_cnn.pkl', load_states=False)
args = parser.parse_args() if args.test_only: if args.model_file is None: raise ValueError('To test model, trained weights need to be provided') # setup data provider img_set_options = dict(repo_dir=args.data_dir, inner_size=224, dtype=args.datatype, subset_pct=args.subset_pct) train = ImageLoader(set_name='train', scale_range=(256, 384), shuffle=True, **img_set_options) test = ImageLoader(set_name='validation', scale_range=(256, 256), do_transforms=False, **img_set_options) layers = [Conv((11, 11, 64), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=3, strides=4), Pooling(3, strides=2), Conv((5, 5, 192), init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin(), padding=2), Pooling(3, strides=2), Conv((3, 3, 384), init=Gaussian(scale=0.03), bias=Constant(0), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Pooling(3, strides=2), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), Dropout(keep=0.5),
def test_model_serialize(backend): (X_train, y_train), (X_test, y_test), nclass = load_mnist() train_set = DataIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = [ Conv((5, 5, 16), init=init_norm, bias=Constant(0), activation=Rectlin()), Pooling(2), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ] path2 = [ Dropout(keep=0.5), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ] layers = [ MergeConcat([path1, path2]), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()), BatchNorm(), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True)) ] tmp_save = 'test_model_serialize_tmp_save.pickle' mlp = Model(layers=layers) mlp.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary()) n_test = 3 num_epochs = 3 # Train model for num_epochs and n_test batches for epoch in range(num_epochs): for i, (x, t) in enumerate(train_set): x = mlp.fprop(x) delta = mlp.cost.get_errors(x, t) mlp.bprop(delta) mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch) if i > n_test: break # Get expected outputs of n_test batches and states of all layers outputs_exp = [] pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs_exp.append(mlp.fprop(x, inference=True)) if i > n_test: break # Serialize model save_obj(mlp.serialize(keep_states=True), tmp_save) # Load model mlp = Model(layers=layers) mlp.load_weights(tmp_save) outputs = [] pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs.append(mlp.fprop(x, inference=True)) if i > n_test: break # Check outputs, states, and params are the same for output, output_exp in zip(outputs, outputs_exp): assert np.allclose(output.get(), output_exp.get()) for pd, pd_exp in zip(pdicts, pdicts_exp): for s, s_e in zip(pd['states'], pd_exp['states']): if isinstance(s, list): # this is the batch norm case for _s, _s_e in zip(s, s_e): assert np.allclose(_s, _s_e) else: assert np.allclose(s, s_e) for p, p_e in zip(pd['params'], pd_exp['params']): if isinstance(p, list): # this is the batch norm case for _p, _p_e in zip(p, p_e): assert np.allclose(_p, _p_e) else: assert np.allclose(p, p_e) os.remove(tmp_save)
if args.bottleneck: mainpath = [ Conv(**conv_params(1, nfm, stride)), Conv(**conv_params(3, nfm)), Conv(**conv_params(1, nfm_out, relu=False)) ] else: mainpath = [ Conv(**conv_params(3, nfm, stride)), Conv(**conv_params(3, nfm, relu=False)) ] return [MergeSum([mainpath, sidepath]), Activation(Rectlin())] layers = [Conv(**conv_params(7, 64, strides=2)), Pooling(3, strides=2)] # Structure of the deep residual part of the network: # args.depth modules of 2 convolutional layers each at feature map depths # of 64, 128, 256, 512 nfms = list( itt.chain.from_iterable( [itt.repeat(2**(x + 6), r) for x, r in enumerate(stages)])) strides = [-1] + [ 1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1]) ] for nfm, stride in zip(nfms, strides): layers.append(module_factory(nfm, stride)) layers.append(Pooling('all', op='avg'))