def test_dconv_zeros(backend, zeros_convargs): fshape, nofm, batch_size = zeros_convargs NervanaObject.be.bsz = NervanaObject.be.bs = batch_size dtypeu = np.float32 init_unif = Uniform(low=0.0, high=0.0) inshape = (64, 28, 28) insize = np.prod(inshape) neon_layer = Deconv(fshape=(fshape, fshape, nofm), strides=1, padding=0, init=init_unif) inp_arr_shape = (insize, batch_size) inp = np.random.random(inp_arr_shape).astype(dtypeu) inp = neon_layer.be.array(inp) inp.lshape = inshape outa = neon_layer.fprop(inp) out = outa.asnumpyarray() assert np.min(out) == 0.0 and np.max(out) == 0.0 err = dtypeu(np.zeros(outa.shape)) deltas = neon_layer.bprop(NervanaObject.be.array(err)).asnumpyarray() assert np.min(deltas) == 0.0 and np.max(deltas) == 0.0 dw = neon_layer.dW.asnumpyarray() assert np.min(dw) == 0.0 and np.max(dw) == 0.0 return
def test_dconv_ones(backend, ones_convargs): indim, nifm, fshape, nofm, batch_size = ones_convargs NervanaObject.be.bsz = NervanaObject.be.bs = batch_size dtypeu = np.float32 # weights set to one init_unif = Uniform(low=1.0, high=1.0) inshape = (nifm, indim, indim) insize = np.prod(inshape) neon_layer = Deconv(fshape=(fshape, fshape, nofm), strides=1, padding=0, init=init_unif) inp = neon_layer.be.array(np.ones((insize, batch_size)).astype(dtypeu)) inp.lshape = inshape # run fprop out = neon_layer.fprop(inp).asnumpyarray() out_exp_min = nifm out_exp_max = fshape * fshape * nifm assert np.min(out) == out_exp_min and np.max(out) == out_exp_max # generate err array err = np.ones(out.shape).astype(dtypeu) # run bprop neon_layer.bprop(NervanaObject.be.array(err)).asnumpyarray() dw = neon_layer.dW.asnumpyarray() # generate the reference layer ref_layer = DeconvRefLayer(1, batch_size, identity, inshape[0], inshape[1:3], (fshape, fshape), nofm, 1, dtypeu) ref_layer.weights = np.ones(neon_layer.W.shape).T.astype(dtypeu) # run bprop ref_layer.bprop(err) # expected output for updates is uniform matrix with # all elements == ofmsize*batch_size updates_exp = ref_layer.ofmsize * batch_size # check dw from neon layer assert np.max(dw) == updates_exp and np.min(dw) == updates_exp # no tolerence here should be exact assert np.max(np.abs(ref_layer.y.T - neon_layer.deltas.get())) == 0.0 return
def test_dconv_rand(backend, rand_convargs): indim, nifm, fshape, nofm, batch_size, rngmax, w_rng = rand_convargs NervanaObject.be.bsz = NervanaObject.be.bs = batch_size dtypeu = np.float32 inp_rng = [0.0, rngmax] init_unif = Uniform(low=w_rng[0], high=w_rng[1]) inshape = (indim, indim, nifm) insize = np.prod(inshape) # generate neon deconv layer # need to switch to nofm here... neon_layer = Deconv(fshape=(fshape, fshape, nofm), strides=1, padding=0, init=init_unif) insize = np.prod(inshape) # generate reference deconv layer ref_layer = DeconvRefLayer(1, batch_size, identity, inshape[0], inshape[1:3], (fshape, fshape), nofm, 1, dtypeu) # setup input in range inp_rng inpa = np.random.random((insize, batch_size)) inpa *= (inp_rng[1] - inp_rng[0]) inpa += inp_rng[0] inpa = inpa.astype(dtypeu) inp = neon_layer.be.array(inpa) inp.lshape = inshape # run fprop on neon neon_out = neon_layer.fprop(inp).asnumpyarray() # pull neon weights into ref layer weights ref_layer.weights = neon_layer.W.asnumpyarray().T ref_out = np.copy(ref_layer.berror) # estimate the numerical precision ref_layer.fprop(inpa.T, permute=True) ref_out2 = ref_layer.berror atol = 10 * np.max(np.abs(ref_out - ref_out2)) assert (np.allclose(ref_out.T, neon_out, atol=atol, rtol=0.0), '%e %e' % (np.max(np.abs(ref_out.T - neon_out)), atol)) # generate err array erra = np.random.random(neon_out.shape) erra *= (inp_rng[1] - inp_rng[0]) erra += inp_rng[0] erra = erra.astype(dtypeu)
def test_dconv_rand(backend, rand_convargs): indim, nifm, fshape, nofm, batch_size, rngmax, w_rng = rand_convargs NervanaObject.be.bsz = NervanaObject.be.bs = batch_size dtypeu = np.float32 inp_rng = [0.0, rngmax] init_unif = Uniform(low=w_rng[0], high=w_rng[1]) inshape = (indim, indim, nifm) insize = np.prod(inshape) # generate neon deconv layer # need to switch to nofm here... neon_layer = Deconv(fshape=(fshape, fshape, nofm), strides=1, padding=0, init=init_unif) insize = np.prod(inshape) # generate reference deconv layer ref_layer = DeconvRefLayer(1, batch_size, identity, inshape[0], inshape[1:3], (fshape, fshape), nofm, 1, dtypeu) # setup input in range inp_rng inpa = np.random.random((insize, batch_size)) inpa *= inp_rng[1] - inp_rng[0] inpa += inp_rng[0] inpa = inpa.astype(dtypeu) inp = neon_layer.be.array(inpa) inp.lshape = inshape # run fprop on neon neon_out = neon_layer.fprop(inp).asnumpyarray() # pull neon weights into ref layer weights ref_layer.weights = neon_layer.W.asnumpyarray().T ref_out = np.copy(ref_layer.berror) # estimate the numerical precision ref_layer.fprop(inpa.T, permute=True) ref_out2 = ref_layer.berror atol = 10 * np.max(np.abs(ref_out - ref_out2)) assert ( np.allclose(ref_out.T, neon_out, atol=atol, rtol=0.0), "%e %e" % (np.max(np.abs(ref_out.T - neon_out)), atol), ) # generate err array erra = np.random.random(neon_out.shape) erra *= inp_rng[1] - inp_rng[0] erra += inp_rng[0] erra = erra.astype(dtypeu)
# Set input and target to X_train train = ArrayIterator(X_train, lshape=(1, 28, 28)) # Initialize the weights and the learning rule init_uni = Uniform(low=-0.1, high=0.1) opt_gdm = GradientDescentMomentum(learning_rate=0.001, momentum_coef=0.9) # Strided conv autoencoder bn = False layers = [ Conv((4, 4, 8), init=init_uni, activation=Rectlin(), batch_norm=bn), Pooling(2), Conv((4, 4, 32), init=init_uni, activation=Rectlin(), batch_norm=bn), Pooling(2), Deconv(fshape=(4, 4, 8), init=init_uni, activation=Rectlin(), batch_norm=bn), Deconv(fshape=(3, 3, 8), init=init_uni, activation=Rectlin(), strides=2, batch_norm=bn), Deconv(fshape=(2, 2, 1), init=init_uni, strides=2, padding=1) ] # Define the cost cost = GeneralizedCost(costfunc=SumSquared()) model = Model(layers=layers) # configure callbacks
# setup weight initialization function init = Gaussian(scale=0.05) # generator using "decovolution" layers relu = Rectlin(slope=0) # relu for generator conv = dict(init=init, batch_norm=True, activation=relu) convp1 = dict(init=init, batch_norm=True, activation=relu, padding=1) convp2 = dict(init=init, batch_norm=True, activation=relu, padding=2) convp1s2 = dict(init=init, batch_norm=True, activation=relu, padding=1, strides=2) G_layers = [ Deconv((1, 1, 16), name="G11", **conv), Deconv((3, 3, 192), name="G12", **convp1), Deconv((3, 3, 192), name="G21", **convp1s2), Deconv((3, 3, 192), name="G22", **convp1), Deconv((3, 3, 96), name="G31", **convp1s2), Deconv((3, 3, 96), name="G32", **conv), Deconv((3, 3, 1), name="G_out", init=init, batch_norm=False, padding=1, activation=Logistic(shortcut=False)) ] # discriminiator using convolution layers lrelu = Rectlin(slope=0.1) # leaky relu for discriminator
pad1 = dict(pad_h=2, pad_w=2, pad_d=2) str1 = dict(str_h=2, str_w=2, str_d=2) conv1 = dict(init=init_gen, batch_norm=False, activation=lrelu, padding=pad1, strides=str1, bias=init_gen) pad2 = dict(pad_h=2, pad_w=2, pad_d=2) str2 = dict(str_h=2, str_w=2, str_d=2) conv2 = dict(init=init_gen, batch_norm=False, activation=lrelu, padding=pad2, strides=str2, bias=init_gen) pad3 = dict(pad_h=0, pad_w=0, pad_d=0) str3 = dict(str_h=1, str_w=1, str_d=1) conv3 = dict(init=init_gen, batch_norm=False, activation=Tanh(), padding=pad3, strides=str3, bias=init_gen) bg = BranchNode("bg") branchg = [bg, Affine(1024, init=init_gen, bias=init_gen, activation=relu), BatchNorm(), Affine(8 * 7 * 7 * 7, init=init_gen, bias=init_gen), Reshape((8, 7, 7, 7)), Deconv((6, 6, 6, 6), **conv1), #14x14x14 BatchNorm(), # Linear(5 * 14 * 14 * 14, init=init), # Reshape((5, 14, 14, 14)), Deconv((5, 5, 5, 64), **conv2), #27x27x27 BatchNorm(), Conv((3, 3, 3, 1), **conv3) ] G_layers = Tree([branchg], name="Generator") print D_layers print G_layers layers = myGenerativeAdversarial(generator=G_layers, discriminator=D_layers) #discriminator=Sequential(D_layers, name="Discriminator")) print 'layers defined'
(X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir) # Set input and target to X_train train = DataIterator(X_train, lshape=(1, 28, 28)) # Initialize the weights and the learning rule init_uni = Uniform(low=-0.1, high=0.1) opt_gdm = GradientDescentMomentum(learning_rate=0.001, momentum_coef=0.9) # Define the layers layers = [ Conv((4, 4, 8), init=init_uni, activation=Rectlin()), Pooling(2), Conv((4, 4, 32), init=init_uni, activation=Rectlin()), Pooling(2), Deconv(fshape=(3, 3, 8), init=init_uni, strides=2, padding=1), Deconv(fshape=(3, 3, 8), init=init_uni, strides=2, padding=1), Deconv(fshape=(4, 4, 1), init=init_uni, strides=2, padding=0) ] # Define the cost cost = GeneralizedCost(costfunc=SumSquared()) mlp = Model(layers=layers) # Fit the model # configure callbacks callbacks = Callbacks(mlp, train, **args.callback_args) mlp.fit(train, optimizer=opt_gdm,
(X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir) # Set input and target to X_train train = DataIterator(X_train, lshape=(1, 28, 28)) # Initialize the weights and the learning rule init_uni = Uniform(low=-0.1, high=0.1) opt_gdm = GradientDescentMomentum(learning_rate=0.001, momentum_coef=0.9) # Define the layers layers = [] layers.append(Conv((4, 4, 8), init=init_uni, activation=Rectlin())) layers.append(Pooling(2)) layers.append(Conv((4, 4, 32), init=init_uni, activation=Rectlin())) layers.append(Pooling(2)) layers.append(Deconv(fshape=(4, 4, 8), init=init_uni)) layers.append(Deconv(fshape=(2, 2, 8), init=init_uni, strides=2)) layers.append(Deconv(fshape=(2, 2, 1), init=init_uni, strides=2)) # Define the cost cost = GeneralizedCost(costfunc=SumSquared()) mlp = Model(layers=layers) # Fit the model # configure callbacks callbacks = Callbacks(mlp, train, output_file=args.output_file, progress_bar=args.progress_bar)
train.init_batch_provider() test.init_batch_provider() init = Gaussian(scale=0.1) opt = Adadelta(decay=0.9) common = dict(init=init, batch_norm=True, activation=Rectlin()) # Set up the model layers layers = [] nchan = 128 layers.append(Conv((2, 2, nchan), strides=2, **common)) for idx in range(16): layers.append(Conv((3, 3, nchan), **common)) if nchan > 16: nchan /= 2 for idx in range(15): layers.append(Deconv((3, 3, nchan), **common)) layers.append(Deconv((4, 4, nchan), strides=2, **common)) layers.append(Deconv((3, 3, 1), init=init, activation=Logistic(shortcut=True))) cost = GeneralizedCost(costfunc=SumSquared()) mlp = Model(layers=layers) callbacks = Callbacks(mlp, train, **args.callback_args) evaluator = Evaluator(callbacks.callback_data, mlp, test, imwidth, args.epochs, args.data_dir, point_num) callbacks.add_callback(evaluator) mlp.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) train.exit_batch_provider()