def inception_bare(ref_module, kvals, name="i"): (p1, p2, p3) = kvals branch1 = [Conv(fshape(1, p1[0]), **common)] if p1[0] else [] branch2 = [ Conv(fshape(1, p2[0]), **common), Conv(fshape(3, p2[1]), **commonp1) ] branch3 = [Pooling(op=p3[0], **pool3s1p1) ] + ([Conv(fshape(1, p3[1]), **common)] if p3[1] else []) branch1 = Sequential(branch1) branch2 = Sequential(branch2) branch3 = Sequential(branch3) (branch1_ref, branch2_ref, branch3_ref) = ref_module[0].layers if p1[0]: for ll, lr in zip(branch1.layers, branch1_ref.layers): if ll.has_params: ll.set_params({'params': {'W': lr.W.get()}}) for ll, lr in zip(branch2.layers, branch2_ref.layers): if ll.has_params: ll.set_params({'params': {'W': lr.W.get()}}) if p3[1]: for ll, lr in zip(branch3.layers, branch3_ref.layers): if ll.has_params: ll.set_params({'params': {'W': lr.W.get()}}) return (branch1.layers, branch2.layers, branch3.layers)
def __init__(self): self.in_shape = [1024, (2538, 38)] init = Constant(0) image_path = Sequential( [Affine(20, init, bias=init), Affine(10, init, bias=init)]) sent_path = Sequential([Affine(30, init, bias=init), Affine(10, init)]) layers = [ MergeMultistream(layers=[image_path, sent_path], merge="recurrent"), Dropout(keep=0.5), LSTM(4, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True), Affine(20, init, bias=init, activation=Softmax()) ] self.layers = layers self.cost = GeneralizedCostMask(CrossEntropyMulti()) self.model = Model(layers=layers) self.model.initialize(self.in_shape, cost=self.cost)
def inception(kvals, name="i"): (p1, p2, p3) = kvals branch1 = [Sequential([Conv(fshape(1, p1[0]), **common)])] if p1[0] else [] branch2 = [Sequential([Conv(fshape(1, p2[0]), **common), Conv(fshape(3, p2[1]), **commonp1)])] branch3 = [Sequential([Pooling(op=p3[0], **pool3s1p1)] + ( [Conv(fshape(1, p3[1]), **common)] if p3[1] else []))] partitions = branch1 + branch2 + branch3 return [MergeBroadcast(layers=partitions, merge="depth")]
def test_concat_l1_l1(backend_default, allrand_args): # test two linear layers that are merged with concat dtypeu = np.float32 w_rng, rngmax = allrand_args # Diff size inputs and outputs nins = [128, 1024] nouts = [64, 2048] batch_size = 16 NervanaObject.be.bsz = batch_size be = NervanaObject.be init_unif = Uniform(low=w_rng[0], high=w_rng[1]) layers = [Sequential(Affine(nout=nout, init=init_unif)) for nout in nouts] inputs = [be.array(dtypeu(np.random.random((nin, batch_size)))) for nin in nins] merge = MergeMultistream(layers, merge="stack") assert(len(inputs) == len(layers)) merge.configure(inputs) merge.allocate() merge.set_deltas(None) out = merge.fprop(inputs).get() sublayers = [s.layers[0] for s in layers] weights = [layer.W.get() for layer in sublayers] out_exp = np.concatenate([np.dot(w, inp.get()) for (w, inp) in zip(weights, inputs)]) assert np.allclose(out, out_exp, atol=1e-3) err_lst = [dtypeu(np.random.random((nout, batch_size))) for nout in nouts] err_concat = np.concatenate(err_lst) merge.bprop(be.array(err_concat)) dW_exp_lst = [np.dot(err, inp.get().T) for (err, inp) in zip(err_lst, inputs)] for layer, dW_exp in zip(sublayers, dW_exp_lst): assert np.allclose(layer.dW.get(), dW_exp) return
def __init__(self, layers, dataset=None, weights_only=False, name="model", optimizer=None): super(Model, self).__init__(name) self.optimizer = optimizer self.params = None # should be able to remove self.states = None # should be able to remove self.epoch_index = 0 self.finished = False self.initialized = False self.cost = None self.nbatches = 0 self.ndata = 0 if dataset is not None: logger.warning('dataset is a deprecated argument and will be ignored') if type(layers) in (ModelDescription, dict): # load up the model from a serialized file (dataset could be None here) self.deserialize(layers, load_states=(not weights_only)) elif type(layers) is str: self.load_params(layers, load_states=(not weights_only)) else: # Wrap the list of layers in a Sequential container if a raw list of layers if type(layers) in (Sequential, Tree, SingleOutputTree): self.layers = layers else: self.layers = Sequential(layers) self.layers.propagate_parallelism("Data")
def __init__(self, layers, name="model", optimizer=None): super(Model, self).__init__(name) self.optimizer = optimizer self.params = None # should be able to remove self.states = None # should be able to remove self.epoch_index = 0 self.finished = False self.initialized = False self.cost = None # Wrap the list of layers in a Sequential container if a raw list of layers self.layers = layers if type(layers) in (Sequential, Tree) else Sequential(layers) self.layers_to_optimize = self.layers.layers_to_optimize
def test_concat_sequence_l1_l1(backend_default, allrand_args, deltas_buffer): # test two linear layers that are merged with concat dtypeu = np.float32 w_rng, rngmax = allrand_args # Diff size input steps nin = 128 steps = [32, 64] nout = 256 batch_size = 16 NervanaObject.be.bsz = batch_size be = NervanaObject.be init_unif = Uniform(low=w_rng[0], high=w_rng[1]) layers = [Sequential(Affine(nout=nout, init=init_unif)) for _ in (0, 1)] inputs = [ be.array(dtypeu(np.random.random((nin, batch_size * step)))) for step in steps ] merge = MergeMultistream(layers, merge="recurrent") assert (len(inputs) == len(layers)) merge.configure(inputs) merge.allocate() merge.allocate_deltas(deltas_buffer) deltas_buffer.allocate_buffers() merge.set_deltas(deltas_buffer) out = merge.fprop(inputs).get() sublayers = [s.layers[0] for s in layers] weights = [layer.W.get() for layer in sublayers] out_exp = np.concatenate( [np.dot(w, inp.get()) for (w, inp) in zip(weights, inputs)], axis=1) assert allclose_with_out(out, out_exp, atol=1e-3) err_lst = [ dtypeu(np.random.random((nout, batch_size * step))) for step in steps ] err_concat = be.array(np.concatenate(err_lst, axis=1)) merge.bprop(err_concat) dW_exp_lst = [ np.dot(err, inp.get().T) for (err, inp) in zip(err_lst, inputs) ] for layer, dW_exp in zip(sublayers, dW_exp_lst): assert allclose_with_out(layer.dW.get(), dW_exp) return
def __init__(self, layers, dataset=None, weights_only=False, name="model", optimizer=None): super(Model, self).__init__(name) self.optimizer = optimizer self.params = None # should be able to remove self.states = None # should be able to remove self.epoch_index = 0 self.finished = False self.initialized = False self.cost = None self.weights_only = weights_only self.nbatches = 0 self.ndata = 0 if type(layers) is ModelDescription or type(layers) is dict: # load up the model from a serialized file (dataset could be None here) load_states = not self.weights_only self.deserialize(layers, dataset, load_states) else: # Wrap the list of layers in a Sequential container if a raw list of layers if type(layers) in (Sequential, Tree, SingleOutputTree): self.layers = layers else: self.layers = Sequential(layers) self.layers_to_optimize = self.layers.layers_to_optimize
def test_model_serialize(backend_default, data): (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=data) train_set = ArrayIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = Sequential([ Conv((5, 5, 16), init=init_norm, bias=Constant(0), activation=Rectlin()), Pooling(2), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ]) path2 = Sequential([ Affine(nout=100, init=init_norm, bias=Constant(0), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ]) layers = [ MergeMultistream(layers=[path1, path2], merge="stack"), Affine(nout=20, init=init_norm, batch_norm=True, activation=Rectlin()), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True)) ] tmp_save = 'test_model_serialize_tmp_save.pickle' mlp = Model(layers=layers) mlp.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary()) mlp.initialize(train_set, cost=mlp.cost) n_test = 3 num_epochs = 3 # Train model for num_epochs and n_test batches for epoch in range(num_epochs): for i, (x, t) in enumerate(train_set): x = mlp.fprop(x) delta = mlp.cost.get_errors(x, t) mlp.bprop(delta) mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch) if i > n_test: break # Get expected outputs of n_test batches and states of all layers outputs_exp = [] pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs_exp.append(mlp.fprop(x, inference=True)) if i > n_test: break # Serialize model mlp.save_params(tmp_save, keep_states=True) # Load model mlp = Model(tmp_save) mlp.initialize(train_set) outputs = [] pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs.append(mlp.fprop(x, inference=True)) if i > n_test: break # Check outputs, states, and params are the same for output, output_exp in zip(outputs, outputs_exp): assert np.allclose(output.get(), output_exp.get()) for pd, pd_exp in zip(pdicts, pdicts_exp): for s, s_e in zip(pd['states'], pd_exp['states']): if isinstance(s, list): # this is the batch norm case for _s, _s_e in zip(s, s_e): assert np.allclose(_s, _s_e) else: assert np.allclose(s, s_e) for p, p_e in zip(pd['params'], pd_exp['params']): assert type(p) == type(p_e) if isinstance(p, list): # this is the batch norm case for _p, _p_e in zip(p, p_e): assert np.allclose(_p, _p_e) elif isinstance(p, np.ndarray): assert np.allclose(p, p_e) else: assert p == p_e os.remove(tmp_save)
def create_model(dis_model='dc', gen_model='dc', cost_type='wasserstein', noise_type='normal', im_size=64, n_chan=3, n_noise=100, n_gen_ftr=64, n_dis_ftr=64, depth=4, n_extra_layers=0, batch_norm=True, gen_squash=None, dis_squash=None, dis_iters=5, wgan_param_clamp=None, wgan_train_sched=False): """ Create a GAN model and associated GAN cost function for image generation Arguments: dis_model (str): Discriminator type, can be 'mlp' for a simple MLP or 'dc' for a DC-GAN style model. (defaults to 'dc') gen_model (str): Generator type, can be 'mlp' for a simple MLP or 'dc' for a DC-GAN style model. (defaults to 'dc') cost_type (str): Cost type, can be 'original', 'modified' following Goodfellow2014 or 'wasserstein' following Arjovsky2017 (defaults to 'wasserstein') noise_type (str): Noise distribution, can be 'uniform or' 'normal' (defaults to 'normal') im_size (int): Image size (defaults to 64) n_chan (int): Number of image channels (defaults to 3) n_noise (int): Number of noise dimensions (defaults to 100) n_gen_ftr (int): Number of generator feature maps (defaults to 64) n_dis_ftr (int): Number of discriminator feature maps (defaults to 64) depth (int): Depth of layers in case of MLP (defaults to 4) n_extra_layers (int): Number of extra conv layers in case of DC (defaults to 0) batch_norm (bool): Enable batch normalization (defaults to True) gen_squash (str or None): Squashing function at the end of generator (defaults to None) dis_squash (str or None): Squashing function at the end of discriminator (defaults to None) dis_iters (int): Number of critics for discriminator (defaults to 5) wgan_param_clamp (float or None): In case of WGAN weight clamp value, None for others wgan_train_sched (bool): Enable training schedule of number of critics (defaults to False) """ assert dis_model in ['mlp', 'dc'], \ "Unsupported model type for discriminator net, supported: 'mlp' and 'dc'" assert gen_model in ['mlp', 'dc'], \ "Unsupported model type for generator net, supported: 'mlp' and 'dc'" assert cost_type in ['original', 'modified', 'wasserstein'], \ "Unsupported GAN cost function type, supported: 'original', 'modified' and 'wasserstein'" # types of final squashing functions squash_func = dict(nosquash=Identity(), sym=Tanh(), asym=Logistic()) if cost_type == 'wasserstein': if gen_model == 'mlp': gen_squash = gen_squash or 'nosquash' elif gen_model == 'dc': gen_squash = gen_squash or 'sym' dis_squash = dis_squash or 'nosquash' else: # for all GAN costs other than Wasserstein gen_squash = gen_squash or 'sym' dis_squash = dis_squash or 'asym' assert gen_squash in ['nosquash', 'sym', 'asym'], \ "Unsupported final squashing function for generator," \ " supported: 'nosquash', 'sym' and 'asym'" assert dis_squash in ['nosquash', 'sym', 'asym'], \ "Unsupported final squashing function for discriminator," \ " supported: 'nosquash', 'sym' and 'asym'" gfa = squash_func[gen_squash] dfa = squash_func[dis_squash] # create model layers if gen_model == 'mlp': gen = create_mlp_generator(im_size, n_chan, n_gen_ftr, depth, batch_norm=False, finact=gfa) noise_dim = (n_noise, ) elif gen_model == 'dc': gen = create_dc_generator(im_size, n_chan, n_noise, n_gen_ftr, n_extra_layers, batch_norm, finact=gfa) noise_dim = (n_noise, 1, 1) if dis_model == 'mlp': dis = create_mlp_discriminator(im_size, n_dis_ftr, depth, batch_norm=False, finact=dfa) elif dis_model == 'dc': dis = create_dc_discriminator(im_size, n_chan, n_dis_ftr, n_extra_layers, batch_norm, finact=dfa) layers = GenerativeAdversarial(generator=Sequential(gen, name="Generator"), discriminator=Sequential( dis, name="Discriminator")) return GAN(layers=layers, noise_dim=noise_dim, noise_type=noise_type, k=dis_iters, wgan_param_clamp=wgan_param_clamp, wgan_train_sched=wgan_train_sched), \ GeneralizedGANCost(costfunc=GANCost(func=cost_type))
def insert_branch_layer(network, b): return Sequential(layers=(network, b))
# parse the command line arguments parser = NeonArgparser(__doc__) args = parser.parse_args() # hyperparameters num_epochs = args.epochs (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir) train_set = ArrayIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) valid_set = ArrayIterator([X_test, X_test], y_test, nclass=nclass, lshape=(1, 28, 28)) # weight initialization init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()), Affine(nout=100, init=init_norm, activation=Rectlin())]) path2 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()), Affine(nout=100, init=init_norm, activation=Rectlin())]) layers = [MergeMultistream(layers=[path1, path2], merge="stack"), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) # fit and validate optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) # configure callbacks callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args)
# setup backend be = gen_backend(**extract_valid_args(args, gen_backend)) # download dataset data_path = load_flickr8k(path=args.data_dir) # Other setnames are flickr30k and coco # load data train_set = ImageCaption(path=data_path, max_images=-1) # weight initialization init = Uniform(low=-0.08, high=0.08) init2 = Constant(val=train_set.be.array(train_set.bias_init)) # model initialization image_path = Sequential([Affine(hidden_size, init, bias=Constant(val=0.0))]) sent_path = Sequential([Affine(hidden_size, init, linear_name='sent')]) layers = [ MergeMultistream(layers=[image_path, sent_path], merge="recurrent"), Dropout(keep=0.5), LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True), Affine(train_set.vocab_size, init, bias=init2, activation=Softmax()) ] cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True)) # configure callbacks checkpoint_model_path = "~/image_caption2.pickle" if args.callback_args['save_path'] is None: args.callback_args['save_path'] = checkpoint_model_path
def test_branch_model(backend_gpu): be = NervanaObject.be trunk = [{ 'layer': Conv, 'config': dict(fshape=(5, 5, 16), **common) }, { 'layer': Pooling, 'config': dict(op='max', **pool2s1p1) }] branch1 = [{ 'layer': Conv, 'config': dict(fshape=(5, 5, 32), **common) }, { 'layer': Pooling, 'config': dict(op='max', **pool2s1p1) }, { 'layer': Affine, 'config': dict(nout=200, **common) }, { 'layer': Affine, 'config': dict(nout=10, init=init1, activation=relu) }] branch2 = [{ 'layer': Conv, 'config': dict(fshape=(3, 3, 32), **common) }, { 'layer': Pooling, 'config': dict(op='max', **pool2s1p1) }, { 'layer': Affine, 'config': dict(nout=256, **common) }, { 'layer': Affine, 'config': dict(nout=10, init=init1, activation=relu) }] alphas = [1, 1] neon_layer, t, b1, b2 = make_tree(trunk, branch1, branch2, alphas) inshape = (16, 32, 32) insize = np.prod(inshape) # Let's force bprop deltas computation for inpa = np.random.random((insize, be.bsz)) inp = be.array(inpa) neon_layer.configure(inshape) neon_layer.allocate() neon_layer.allocate_deltas() neon_out = [i.get() for i in neon_layer.fprop(inp)] ref_layers = [Sequential(t), Sequential(b1), Sequential(b2)] ref_layers[0].configure(inshape) ref_layers[1].configure(ref_layers[0].out_shape) ref_layers[2].configure(ref_layers[0].out_shape) [r.allocate() for r in ref_layers] [r.allocate_deltas() for r in ref_layers] # Now copy the weights ref_all_layers = ref_layers[0].layers + ref_layers[1].layers + ref_layers[ 2].layers ref_weight_layers = [l for l in ref_all_layers if l.has_params] neon_weight_layers = neon_layer.layers_to_optimize for rl, nl in zip(ref_weight_layers, neon_weight_layers): rl.set_params({'params': {'W': nl.W.get()}}) # Forward prop inp_middle = ref_layers[0].fprop(inp) ref_out = [r.fprop(inp_middle).get() for r in ref_layers[1:]] for h, r in zip(neon_out, ref_out): difference = np.max(np.abs(h - r)) assert (difference < 1e-9) # Back prop erra = [np.random.random(ll.shape) for ll in neon_out] err = [be.array(e) for e in erra] input_layer = neon_layer.layers[0].layers[ 0] # reference the trunk, then the root input_layer.prev_layer = True input_layer.set_deltas([be.iobuf(inshape)]) neon_layer.bprop(err) errp = input_layer.deltas.get() for i, r in enumerate(ref_layers): r.layers[0].prev_layer = True _inshape = inshape if i == 0 else ref_layers[0].out_shape r.layers[0].set_deltas([be.iobuf(_inshape)]) joined_err = be.iobuf(ref_layers[0].out_shape) branch_errs = [ r.bprop(e, a) for r, e, a in reversed(list(zip(ref_layers[1:], err, alphas))) ] joined_err[:] = branch_errs[0] + branch_errs[1] err_ref = ref_layers[0].bprop(joined_err).get() difference = np.max(np.abs(err_ref - errp)) neon_logger.display("Max difference: {}".format(difference)) assert (difference < 1e-9)
def mergesum_test_config(modfunc, use_stride=1): NervanaObject.be = gen_backend("gpu", batch_size=64) be = NervanaObject.be l1 = Conv(**conv_params(3, 16)) neon_layer = modfunc(16, use_stride) inshape = (16, 32, 32) insize = np.prod(inshape) inpa = np.random.random((insize, batch_size)) neon_seq = Sequential([l1] + neon_layer) neon_seq.configure(inshape) inp = be.array(inpa) neon_seq.allocate() # print neon_layer.nested_str() # neon_layer.layers[0].prev_layer = True neon_seq.allocate_deltas() neon_out = neon_seq.fprop(inp).get() # Now make the reference pathways: p1, p2 = module_factory_copy(neon_layer, modfunc, 16, use_stride) l11 = Conv(**conv_params(3, 16)) l12 = Conv(**conv_params(3, 16)) for ll in (l11, l12): for lcopy, lref in zip(ll, l1): if lcopy.has_params: lcopy.set_params(lref.get_params_serialize()) path1 = Sequential([l11] + p1) path2 = Sequential([l12] + p2) for ll in (path1, path2): ll.configure(inshape) ll.allocate() ll.allocate_deltas() o1 = path1.fprop(inp).get() o2 = path2.fprop(inp).get() # Now relu it neon_out_ref = np.maximum(o1+o2, 0) difference = neon_out_ref - neon_out print np.max(np.abs(difference)) # need to have bsum false for this test to be valid # assert np.max(np.abs(difference)) < 1e-7 print "Fprop matching" print "Beginning Back prop" erra = np.random.random(neon_out.shape) err = be.array(erra) ebr = neon_seq.layers[4].bprop(err) print "Orig Error", ebr.get()[0, :20] ebr = neon_seq.layers[3].bprop(ebr) trunk_neon = ebr.get() err = be.array(erra) err[:] = be.greater(be.array(neon_out_ref), 0) * err eb1 = err for l in reversed(path1.layers[3:]): eb1 = l.bprop(eb1) t1 = eb1.get() err = be.array(erra) err[:] = be.greater(be.array(neon_out_ref), 0) * err eb2 = err for l in reversed(path2.layers[3:]): eb2 = l.bprop(eb2) t2 = eb2.get() print np.max(np.abs(trunk_neon - (t1 + t2)))
def test_branch_model_cpu(backend_cpu64): np.random.seed(0) be = NervanaObject.be be.bsz = 32 main1 = main_branch() i1 = inception([(32,), (32, 32), ('max', 16)]) top = top_branch() neon_layer = Sequential(main1 + i1 + top) inshape = (4, 224, 224) insize = np.prod(inshape) inpa = np.random.random((insize, batch_size)) neon_layer.configure(inshape) inp = neon_layer.be.array(inpa) neon_layer.allocate() neon_logger.display(neon_layer.nested_str()) neon_layer.layers[0].prev_layer = True neon_layer.allocate_deltas() neon_out = neon_layer.fprop(inp).get() # Now make the reference pathways: main_trunk2 = Sequential(main_branch()) main_trunk2.configure(inshape) main2 = main_trunk2.layers main2[0].prev_layer = True main2[0].deltas = be.iobuf(inshape) (b1, b2, b3) = inception_bare(i1, [(32,), (32, 32), ('max', 16)]) for bb in (b1, b2, b3): oshape = inshape for ll in main2 + bb: oshape = ll.configure(oshape) main1_trunk = neon_layer.layers[:8] for ll, lo in zip(main2, main1_trunk): if ll.has_params: ll.set_params({'params': {'W': lo.W.get()}}) ll.allocate() temp_buff = DeltasTree() ll.allocate_deltas(temp_buff) temp_buff.allocate_buffers() ll.set_deltas(temp_buff) for bb in (b1, b2, b3): for ll in bb: ll.allocate() temp_buff = DeltasTree() ll.allocate_deltas(temp_buff) temp_buff.allocate_buffers() ll.set_deltas(temp_buff) # Create the combined output buffer merge_output = be.empty_like(neon_layer.layers[8].outputs) x = inp for ll in main2: x = ll.fprop(x) start = 0 for bb in (b1, b2, b3): xb = x for ll in bb: xb = ll.fprop(xb) end = start + xb.shape[0] merge_output[start:end] = xb start = end x = merge_output top_trunk = Sequential(top).layers for ll in top_trunk: x = ll.fprop(x) neon_out_ref = x.get() assert allclose_with_out(neon_out, neon_out_ref, rtol=0) neon_logger.display("Beginning Back prop") erra = np.random.random(neon_out.shape) err = be.array(erra) for ll in reversed(neon_layer.layers[8:]): err = ll.bprop(err) neon_deltas = err.get() for bb, errb in zip((b1, b2, b3), neon_layer.layers[8].error_views): for ll in reversed(bb): errb = ll.bprop(errb) # Now sum up the deltas at the root of the branch layer and compare ref_deltas = be.zeros_like(b1[0].deltas) ref_deltas[:] = b3[0].deltas + b2[0].deltas + b1[0].deltas neon_ref_deltas = ref_deltas.get() assert allclose_with_out(neon_deltas, neon_ref_deltas, rtol=0)
def test_branch_model_fork(backend_gpu): from neon.layers import BranchNode, Tree np.random.seed(0) be = NervanaObject.be if be.gpu_memory_size < 6.1 * 1024 * 1024 * 1024: pytest.skip(msg='Test requires more than 6.1GB') be.bsz = 64 bnode = BranchNode() i1 = inception([(32,), (32, 32), ('max', 16)]) top1 = top_branch() top2 = top_branch() p1 = Sequential(main_branch() + [bnode, i1] + top1) p2 = [bnode] + top2 alpha2 = 0.3 neon_layer = Tree([p1, p2], alphas=[1.0, alpha2]) inshape = (4, 224, 224) insize = np.prod(inshape) inpa = np.random.random((insize, batch_size)) neon_layer.configure(inshape) inp = neon_layer.be.array(inpa) neon_layer.allocate() neon_layer.layers[0].layers[0].prev_layer = True neon_layer.allocate_deltas() neon_out_dev = neon_layer.fprop(inp) neon_out = [d.get() for d in neon_out_dev] # Now make the reference pathways: main_trunk2 = Sequential(main_branch()) main_trunk2.configure(inshape) main2 = main_trunk2.layers main2[0].prev_layer = True main2[0].deltas = be.iobuf(inshape) branch2 = Sequential(top_branch()) lbranch2 = branch2.layers (b1, b2, b3) = inception_bare(i1, [(32,), (32, 32), ('max', 16)]) for bb in (b1, b2, b3, lbranch2): oshape = inshape for ll in main2 + bb: oshape = ll.configure(oshape) main1_trunk = neon_layer.layers[0].layers[:8] for ll, lo in zip(main2, main1_trunk): if ll.has_params: ll.set_params({'params': {'W': lo.W.get()}}) ll.allocate() temp_deltas = DeltasTree() temp_deltas.proc_layer(ll) temp_deltas.allocate_buffers() ll.set_deltas(temp_deltas) for ll, lo in zip(lbranch2, neon_layer.layers[1].layers[1:]): if ll.has_params: ll.set_params({'params': {'W': lo.W.get()}}) for bb in (b1, b2, b3, lbranch2): for ll in bb: ll.allocate() temp_deltas = DeltasTree() temp_deltas.proc_layer(ll) temp_deltas.allocate_buffers() ll.set_deltas(temp_deltas) # Create the combined output buffer merge_output = be.empty_like(neon_layer.layers[0].layers[9].outputs) x = inp for ll in main2: x = ll.fprop(x) main2_out = x start = 0 for bb in (b1, b2, b3): xb = main2_out for ll in bb: xb = ll.fprop(xb) end = start + xb.shape[0] merge_output[start:end] = xb start = end x = merge_output top_trunk = Sequential(top1).layers for ll in top_trunk: x = ll.fprop(x) neon_out_ref = x.get() assert allclose_with_out(neon_out_ref, neon_out[0], rtol=0) # Now do second branch neon_out_ref2 = branch2.fprop(main2_out).get() assert allclose_with_out(neon_out_ref2, neon_out[1]) neon_logger.display("Beginning Back prop") erra = [np.random.random(d.shape) for d in neon_out] err = [be.array(d) for d in erra] neon_layer.layers[0].layers[0].deltas = be.iobuf(inshape) neon_layer.bprop(err) bottom_neon_deltas = neon_layer.layers[0].layers[1].deltas.get() middle_neon_deltas = neon_layer.layers[1].layers[1].deltas.get() err0 = err[0] for ll in reversed(top_trunk): err0 = ll.bprop(err0) err1 = err[1] for ll in reversed(lbranch2): err1 = ll.bprop(err1) for bb, errb in zip((b1, b2, b3), neon_layer.layers[0].layers[-5].error_views): for ll in reversed(bb): errb = ll.bprop(errb) # Now sum up the deltas at the root of the branch layer and compare ref_deltas = be.zeros_like(b1[0].deltas) ref_deltas[:] = alpha2 * lbranch2[0].deltas ref_deltas[:] = ref_deltas + b3[0].deltas + b2[0].deltas + b1[0].deltas neon_ref_deltas = ref_deltas.get() assert allclose_with_out(middle_neon_deltas, neon_ref_deltas, rtol=0) x = ref_deltas main2[0].deltas = be.iobuf(inshape) for ll in reversed(main2): x = ll.bprop(x) bottom_neon_ref_deltas = main2[1].deltas.get() assert allclose_with_out(bottom_neon_deltas, bottom_neon_ref_deltas, rtol=0)
def mergesum_test_config(be, modfunc, use_stride=1): l1 = Conv(**conv_params(3, 16)) neon_layer = modfunc(16, use_stride) inshape = (16, 32, 32) insize = np.prod(inshape) inpa = np.random.random((insize, batch_size)) neon_seq = Sequential([l1] + neon_layer) neon_seq.configure(inshape) inp = be.array(inpa) neon_seq.allocate() # neon_layer.layers[0].prev_layer = True neon_seq.allocate_deltas() neon_out = neon_seq.fprop(inp).get() # Now make the reference pathways: p1, p2 = module_factory_copy(neon_layer, modfunc, 16, use_stride) l11 = Conv(**conv_params(3, 16)) l12 = Conv(**conv_params(3, 16)) for ll in (l11, l12): for lcopy, lref in zip(ll, l1): if lcopy.has_params: lcopy.set_params(lref.get_params_serialize()) path1 = Sequential([l11] + p1) path2 = Sequential([l12] + p2) for ll in (path1, path2): ll.configure(inshape) ll.allocate() ll.allocate_deltas() o1 = path1.fprop(inp) o2 = path2.fprop(inp) # convert mkl buffer to cpu for following cpu execution be.convert_data(o1, False) be.convert_data(o2, False) neon_out_ref = be.empty_like(o1) neon_out_ref[:] = be.maximum(o1 + o2, 0) # need to have bsum false for this test to be valid assert allclose_with_out(neon_out_ref.get(), neon_out, rtol=0) erra = np.random.random(neon_out.shape) err = be.array(erra) ebr = neon_seq.layers[-1].bprop(err) ebr = neon_seq.layers[-2].bprop(ebr) trunk_neon = ebr.get() err = be.array(erra) err[:] = be.greater(neon_out_ref, 0) * err pstart = len(l1) eb1 = err for l in reversed(path1.layers[pstart:]): eb1 = l.bprop(eb1) eb2 = err for l in reversed(path2.layers[pstart:]): eb2 = l.bprop(eb2) be.convert_data(eb1, False) be.convert_data(eb2, False) err_ref = be.empty_like(eb1) err_ref[:] = eb1 + eb2 assert allclose_with_out(err_ref.get(), trunk_neon, rtol=0)
def test_branch_model(): NervanaObject.be = gen_backend("gpu", batch_size=64) be = NervanaObject.be main1 = main_branch() i1 = inception([(32, ), (32, 32), ('max', 16)]) top = top_branch() neon_layer = Sequential(main1 + i1 + top) inshape = (3, 224, 224) insize = np.prod(inshape) inpa = np.random.random((insize, batch_size)) neon_layer.configure(inshape) inp = neon_layer.be.array(inpa) neon_layer.allocate() print neon_layer.nested_str() neon_layer.layers[0].prev_layer = True neon_layer.allocate_deltas() neon_layer.layers[0].set_deltas([be.iobuf(inshape)]) neon_out = neon_layer.fprop(inp).get() # Now make the reference pathways: main_trunk2 = Sequential(main_branch()) main_trunk2.configure(inshape) main2 = main_trunk2.layers main2[0].prev_layer = True main2[0].set_deltas([be.iobuf(inshape)]) (b1, b2, b3) = inception_bare(i1, [(32, ), (32, 32), ('max', 16)]) for bb in (b1, b2, b3): oshape = inshape for ll in main2 + bb: oshape = ll.configure(oshape) main1_trunk = neon_layer.layers[:8] for ll, lo in zip(main2, main1_trunk): if ll.has_params: ll.set_params({'params': {'W': lo.W.get()}}) ll.allocate() ll.set_deltas([be.iobuf(ll.in_shape)]) for bb in (b1, b2, b3): for ll in bb: ll.allocate() ll.set_deltas([be.iobuf(ll.in_shape)]) # Create the combined output buffer merge_output = be.empty_like(neon_layer.layers[8].outputs) x = inp for ll in main2: x = ll.fprop(x) start = 0 for bb in (b1, b2, b3): xb = x for ll in bb: xb = ll.fprop(xb) end = start + xb.shape[0] merge_output[start:end] = xb start = end x = merge_output top_trunk = Sequential(top).layers for ll in top_trunk: x = ll.fprop(x) neon_out_ref = x.get() difference = neon_out_ref - neon_out assert np.max(np.abs(difference)) < 1e-7 print np.max(np.abs(difference)) print "Beginning Back prop" erra = np.random.random(neon_out.shape) err = be.array(erra) for ll in reversed(neon_layer.layers[8:]): err = ll.bprop(err) neon_deltas = err.get() for bb, errb in zip((b1, b2, b3), neon_layer.layers[8].error_views): for ll in reversed(bb): errb = ll.bprop(errb) # Now sum up the deltas at the root of the branch layer and compare ref_deltas = be.zeros_like(b1[0].deltas) ref_deltas[:] = b1[0].deltas + b2[0].deltas + b3[0].deltas neon_ref_deltas = ref_deltas.get() difference = neon_deltas - neon_ref_deltas print np.max(np.abs(difference)) assert np.max(np.abs(difference)) < 1e-8
conv4 = dict(init=init, batch_norm=True, activation=lrelu, dilation=dict(dil_h=2, dil_w=2, dil_d=2)) conv5 = dict(init=init, batch_norm=True, activation=lrelu, padding=dict(pad_h=2, pad_w=2, pad_d=0), dilation=dict(dil_h=2, dil_w=2, dil_d=3)) conv6 = dict(init=init, batch_norm=False, activation=lrelu, padding=dict(pad_h=1, pad_w=0, pad_d=3)) G_layers = [ Linear(64 * 7 * 7, init=init), # what's about the input volume Reshape((7, 7, 8, 8)), Conv((6, 6, 8, 64), **conv4), Conv((6, 5, 8, 6), **conv5), Conv((3, 3, 8, 6), **conv6), Conv((2, 2, 2, 1), init=init, batch_norm=False, activation=relu) ] # what's about Embedding layers = GenerativeAdversarial(generator=Sequential(G_layers, name="Generator"), discriminator=Sequential(D_layers, name="Discriminator")) # setup cost function as CrossEntropy cost = GeneralizedGANCost(costfunc=GANCost(func="modified"))
def test_branch_model_fork(): from neon.layers import BranchNode, Tree NervanaObject.be = gen_backend("gpu", batch_size=64) be = NervanaObject.be bnode = BranchNode() i1 = inception([(32, ), (32, 32), ('max', 16)]) top1 = top_branch() top2 = top_branch() p1 = Sequential(main_branch() + [bnode, i1] + top1) p2 = [bnode] + top2 alpha2 = 0.3 neon_layer = Tree([p1, p2], alphas=[1.0, alpha2]) inshape = (3, 224, 224) insize = np.prod(inshape) inpa = np.random.random((insize, batch_size)) neon_layer.configure(inshape) inp = neon_layer.be.array(inpa) neon_layer.allocate() print neon_layer.nested_str() neon_layer.layers[0].layers[0].prev_layer = True neon_layer.allocate_deltas() neon_layer.layers[0].layers[0].set_deltas([be.iobuf(inshape)]) neon_out_dev = neon_layer.fprop(inp) neon_out = [d.get() for d in neon_out_dev] # Now make the reference pathways: main_trunk2 = Sequential(main_branch()) main_trunk2.configure(inshape) main2 = main_trunk2.layers main2[0].prev_layer = True main2[0].set_deltas([be.iobuf(inshape)]) branch2 = Sequential(top_branch()) lbranch2 = branch2.layers (b1, b2, b3) = inception_bare(i1, [(32, ), (32, 32), ('max', 16)]) for bb in (b1, b2, b3, lbranch2): oshape = inshape for ll in main2 + bb: oshape = ll.configure(oshape) main1_trunk = neon_layer.layers[0].layers[:8] for ll, lo in zip(main2, main1_trunk): if ll.has_params: ll.set_params({'params': {'W': lo.W.get()}}) ll.allocate() ll.set_deltas([be.iobuf(ll.in_shape)]) for ll, lo in zip(lbranch2, neon_layer.layers[1].layers[1:]): if ll.has_params: ll.set_params({'params': {'W': lo.W.get()}}) for bb in (b1, b2, b3, lbranch2): for ll in bb: ll.allocate() ll.set_deltas([be.iobuf(ll.in_shape)]) # Create the combined output buffer merge_output = be.empty_like(neon_layer.layers[0].layers[9].outputs) x = inp for ll in main2: x = ll.fprop(x) main2_out = x start = 0 for bb in (b1, b2, b3): xb = main2_out for ll in bb: xb = ll.fprop(xb) end = start + xb.shape[0] merge_output[start:end] = xb start = end x = merge_output top_trunk = Sequential(top1).layers for ll in top_trunk: x = ll.fprop(x) neon_out_ref = x.get() difference = neon_out_ref - neon_out[0] assert np.max(np.abs(difference)) < 1e-7 print np.max(np.abs(difference)) # Now do second branch neon_out_ref2 = branch2.fprop(main2_out).get() difference = neon_out_ref2 - neon_out[1] assert np.max(np.abs(difference)) < 1e-7 print np.max(np.abs(difference)) print "Beginning Back prop" erra = [np.random.random(d.shape) for d in neon_out] err = [be.array(d) for d in erra] neon_layer.layers[0].layers[0].deltas = be.iobuf(inshape) neon_layer.bprop(err) bottom_neon_deltas = neon_layer.layers[0].layers[1].deltas.get() middle_neon_deltas = neon_layer.layers[1].layers[1].deltas.get() err0 = err[0] for ll in reversed(top_trunk): err0 = ll.bprop(err0) err1 = err[1] for ll in reversed(lbranch2): err1 = ll.bprop(err1) for bb, errb in zip((b1, b2, b3), neon_layer.layers[0].layers[-5].error_views): for ll in reversed(bb): errb = ll.bprop(errb) # Now sum up the deltas at the root of the branch layer and compare ref_deltas = be.zeros_like(b1[0].deltas) ref_deltas[:] = b1[0].deltas + b2[0].deltas + b3[ 0].deltas + alpha2 * lbranch2[0].deltas neon_ref_deltas = ref_deltas.get() difference = middle_neon_deltas - neon_ref_deltas print np.max(np.abs(difference)) assert np.max(np.abs(difference)) < 1e-8 x = ref_deltas main2[0].deltas = be.iobuf(inshape) for ll in reversed(main2): x = ll.bprop(x) bottom_neon_ref_deltas = main2[1].deltas.get() difference = bottom_neon_deltas - bottom_neon_ref_deltas print np.max(np.abs(difference)) assert np.max(np.abs(difference)) < 1e-8