def ladder1(X, y, states, **kwargs): noise = kwargs.get('noise', 0.3) # hyperparameters that denote the importance of each layer denoising_cost = [1000.0, 10.0, 0.10, 0.10, 0.10] if states is None: # f_encoder = N.Sequence([ N.Flatten(outdim=2), N.Dense(num_units=1024, b_init=None), N.BatchNorm( axes=0, noise_level=noise, noise_dims=None, activation=K.relu), N.Dense(num_units=512, b_init=None), N.BatchNorm( axes=0, noise_level=noise, noise_dims=None, activation=K.relu), N.Dense(num_units=256, b_init=None), N.BatchNorm( axes=0, noise_level=noise, noise_dims=None, activation=K.relu), N.Dense(num_units=128, b_init=None), N.BatchNorm( axes=0, noise_level=noise, noise_dims=None, activation=K.relu), N.Dense(num_units=10, activation=K.softmax), ], all_layers=True, debug=True, name='Encoder') # f_decoder = N.Sequence([ N.Dense(num_units=128, b_init=None), N.BatchNorm(axes=0, activation=K.relu), N.Dense(num_units=256, b_init=None), N.BatchNorm(axes=0, activation=K.relu), N.Dense(num_units=512, b_init=None), N.BatchNorm(axes=0, activation=K.relu), N.Dense(num_units=1024, b_init=None), N.BatchNorm(axes=0, activation=K.relu), N.Reshape(shape=(-1, 28, 28)), ], all_layers=True, debug=True, name='Decoder') else: f_encoder, f_decoder = states y_encoder_clean = f_encoder(X, noise=-1)[2::2] y_encoder_corrp = f_encoder(X, noise=1)[2::2] print(len(y_encoder_clean), len(y_encoder_corrp)) exit() return (None, None), [f_encoder, f_decoder]
def feedforward_vae(X, X1, f): if f is None: f = N.Sequence([ N.Dense(num_units=10, activation=K.softmax), N.Dropout(level=0.5) ]) return f(X), f
def test_seq(self): X = K.placeholder((None, 28, 28, 1)) f = N.Sequence([ N.Conv(8, (3, 3), strides=1, pad='same'), N.Dimshuffle(pattern=(0, 3, 1, 2)), N.Flatten(outdim=2), N.Noise(level=0.3, noise_dims=None, noise_type='gaussian'), N.Dense(128, activation=tf.nn.relu), N.Dropout(level=0.3, noise_dims=None), N.Dense(10, activation=tf.nn.softmax) ]) y = f(X) yT = f.T(y) f1 = K.function(X, y, defaults={K.is_training(): True}) f2 = K.function(X, yT, defaults={K.is_training(): False}) f = cPickle.loads(cPickle.dumps(f)) y = f(X) yT = f.T(y) f3 = K.function(X, y, defaults={K.is_training(): True}) f4 = K.function(X, yT, defaults={K.is_training(): False}) x = np.random.rand(12, 28, 28, 1) self.assertEquals(f1(x).shape, (2688, 10)) self.assertEquals(f3(x).shape, (2688, 10)) self.assertEqual(np.round(f1(x).sum(), 4), np.round(f3(x).sum(), 4)) self.assertEquals(y.shape.as_list(), (None, 10)) self.assertEquals(f2(x).shape, (12, 28, 28, 1)) self.assertEquals(f4(x).shape, (12, 28, 28, 1)) self.assertEqual(str(f2(x).sum())[:4], str(f4(x).sum())[:4]) self.assertEquals(yT.shape.as_list(), (None, 28, 28, 1))
def test_computational_graph3(self): # validate the number of updates found by ComputationGraph X = K.placeholder(shape=(None, 28, 28, 3)) f = N.Sequence([ N.Conv(32, 3, pad='same', activation=K.linear), N.BatchNorm(activation=K.relu), N.Flatten(outdim=2), N.Dense(16), N.BatchNorm(), N.Dense(10) ]) K.set_training(True) y_train = f(X) K.set_training(False) y_score = f(X) self.assertTrue( K.get_shape(y_train) == K.get_shape(y_score) and K.get_shape(y_score) == (None, 10)) cc_train = K.ComputationGraph(y_train) cc_score = K.ComputationGraph(y_score) self.assertTrue(len(cc_score.updates) == 0) self.assertTrue(len(cc_train.updates) == 4) # create real function fn_train = K.function(X, y_train) fn_score = K.function(X, y_score) shape1 = fn_train(np.random.rand(12, 28, 28, 3)).shape shape2 = fn_score(np.random.rand(12, 28, 28, 3)).shape self.assertTrue(shape1 == shape2 and shape1 == (12, 10))
def cnn(X, y): nb_classes = y.shape.as_list()[-1] with N.args_scope(['Conv', dict(b_init=None, activation=K.linear)], ['BatchNorm', dict(activation=K.relu)]): f = N.Sequence([ N.Dimshuffle(pattern=(0, 2, 3, 1)), N.Conv(32, (3, 3), pad='same', stride=(1, 1)), N.BatchNorm(), N.Conv(32, (3, 3), pad='same', stride=(1, 1), b_init=0, activation=K.relu), N.Pool(pool_size=(2, 2), strides=None, mode='max'), N.Dropout(level=0.25), # N.Conv(64, (3, 3), pad='same', stride=(1, 1)), N.BatchNorm(), N.Conv(64, (3, 3), pad='same', stride=(1, 1), b_init=0., activation=K.relu), N.Pool(pool_size=(2, 2), strides=None, mode='max'), N.Dropout(level=0.25), # N.Flatten(outdim=2), N.Dense(512, activation=K.relu), N.Dropout(level=0.5), N.Dense(nb_classes, activation=K.linear) ], debug=1) logit = f(X) prob = tf.nn.softmax(logit) return {'logit': logit, 'prob': prob}
def gender(X, f, **kwargs): nb_gender = kwargs.get('nb_gender', 4) if f is None: f = N.Sequence([ N.Dimshuffle(pattern=(0, 1, 2, 'x')), N.Conv(num_filters=32, filter_size=3, strides=1, b_init=None, pad='valid'), N.BatchNorm(activation=K.relu), N.Pool(pool_size=2, mode='avg'), N.Conv(num_filters=64, filter_size=3, strides=1, b_init=None, pad='valid'), N.BatchNorm(activation=K.relu), N.Pool(pool_size=2, mode='avg'), N.Flatten(outdim=3), N.Dense(num_units=512, b_init=None), N.BatchNorm(axes=(0, 1)), N.AutoRNN(num_units=128, rnn_mode='gru', num_layers=2, input_mode='linear', direction_mode='unidirectional'), N.Flatten(outdim=2), N.Dense(num_units=nb_gender, activation=K.softmax) ], debug=True) return f(X), f
def odin_net1(): "FNN" f = N.Sequence([ N.Dense(32, W_init=random(784, 32), b_init=zeros(32), activation=K.relu), N.Dense(16, W_init=random(32, 16), b_init=zeros(16), activation=K.softmax) ]) return X2, f(X2)
def odin_net3(): "RNN" W = [random(28, 32), random(32, 32), random(32), random_bin(12, 28)] f = N.Sequence([ N.Dense(num_units=32, W_init=W[0], b_init=W[2], activation=K.linear), N.RNN(num_units=32, activation=K.relu, W_init=W[1]) ]) return X1, f(X1, hid_init=zeros(1, 32))
def test(X, y): nb_classes = y.shape.as_list()[-1] f = N.Sequence([ N.Flatten(outdim=2), N.Dense(512, activation=K.relu), N.Dropout(level=0.5), N.Dense(nb_classes, activation=K.linear) ], debug=2) logit = f(X) prob = tf.nn.softmax(logit) return {'logit': logit, 'prob': prob}
def dense_creator(): net = N.Sequence([ N.Dense(int(args.hdim), b_init=0 if args.no_batchnorm else None, activation=K.relu if args.no_batchnorm else K.linear), None if args.no_batchnorm else N.BatchNorm(activation=K.relu) ], debug=True, name="DenseBatchNorm%d" % index[0]) index[0] += 1 return net
def test_slice_ops(self): X = K.placeholder(shape=(None, 28, 28, 28, 3)) f = N.Sequence([ N.Conv(32, 3, pad='same', activation=K.linear), N.BatchNorm(activation=tf.nn.relu), N.Flatten(outdim=4)[:, 8:12, 18:25, 13:], ]) y = f(X) fn = K.function(X, y) self.assertTrue( fn(np.random.rand(12, 28, 28, 28, 3)).shape[1:] == tuple( y.shape.as_list()[1:])) self.assertEqual(y.shape.as_list()[1:], [4, 7, 883])
def test_helper_ops_variables(self): X = K.placeholder(shape=(10, 20)) f = N.Sequence([ N.Dense(12), N.Dense(8), N.BatchNorm(), N.Dense(25, W_init=tf.zeros(shape=(8, 25))) ]) y = f(X) self.assertEqual(y.shape.as_list(), [10, 25]) self.assertEqual(len(f.variables), 10) self.assertEqual(len(f.parameters), 7) self.assertEqual(len(f.trainable_variables), 9)
def odin_net2(): "CNN" f = N.Sequence([ N.Dimshuffle((0, 1, 2, 'x')), N.Conv(12, (3, 3), strides=(1, 1), pad='same', untie_biases=False, W_init=random(3, 3, 1, 12), activation=K.relu), N.Pool(pool_size=(2, 2), strides=None, mode='max', ignore_border=True), N.Conv(16, (3, 3), strides=(1, 1), pad='same', untie_biases=False, W_init=random(3, 3, 12, 16), activation=K.sigmoid), N.Dimshuffle((0, 3, 1, 2)) ]) return X1, f(X1)
def test_computational_graph1(self): X = K.placeholder(shape=(None, 32), name='input') z = K.variable(np.random.rand(10, 10), name='z') f = N.Sequence( [N.Dense(16, activation=K.relu), N.Dense(8, activation=K.softmax)]) y = f(X) add_auxiliary_variable(y, K.constant(10, name='aux_const')) tmp = K.ComputationGraph(y) self.assertEqual(len(tmp.placeholders), 1) self.assertEqual(len(tmp.trainable_variables), 4) self.assertEqual(len(tmp.parameters), 4) self.assertEqual(len(tmp.dict_of_placeholders), 1) self.assertEqual(len(tmp.auxiliary_variables), 1) tmp.intermediary_variables # no idea how to test this self.assertEqual(len(tmp.updates), 1) self.assertEqual(K.ComputationGraph(y), tmp)
def test_load_save3(self): X = K.placeholder(shape=(None, 28, 28)) ops = N.Sequence([ N.Dimshuffle(pattern=(0, 1, 2, 'x')), N.Conv(8, (3, 3), strides=(1, 1), pad='same', activation=K.relu), K.pool2d, N.Flatten(outdim=2), N.Dense(64, activation=K.relu), N.Dense(10, activation=K.softmax) ]) y = ops(X) f1 = K.function(X, y) ops_ = cPickle.loads( cPickle.dumps(ops, protocol=cPickle.HIGHEST_PROTOCOL)) y_ = ops_(X) f2 = K.function(X, y_) x = np.random.rand(32, 28, 28) self.assertEqual(np.sum(f1(x) - f2(x)), 0.)
def create(): f = N.Sequence([ N.Conv(8, (3, 3), strides=1, pad='same'), N.Dimshuffle(pattern=(0, 3, 1, 2)), N.FlattenLeft(outdim=2), N.Noise(level=0.3, noise_dims=None, noise_type='gaussian'), N.Dense(128, activation=K.relu), N.Dropout(level=0.3, noise_dims=None), N.Dense(10, activation=K.softmax) ], debug=True) y = f(X) yT = f.T(y) f1 = K.function(X, y) f2 = K.function(X, yT) cPickle.dump(f, open(U.get_modelpath('dummy.ai', override=True), 'w')) _ = f1(x) print(_.shape, _.sum()) _ = f2(x) print(_.shape, _.sum())
# =========================================================================== ds = fuel.load_cifar10() print(ds) X_train = K.placeholder(shape=(None, ) + ds['X_train'].shape[1:], name='X_train') X_score = K.placeholder(shape=(None, ) + ds['X_train'].shape[1:], name='X_score') y = K.placeholder(shape=(None, ), name='y', dtype='int32') # =========================================================================== # Build network # =========================================================================== ops = N.Sequence([ N.Flatten(outdim=2), N.Dense(512, activation=K.relu), N.Dense(256, activation=K.relu), N.Dense(10, activation=K.softmax) ]) ops = cPickle.loads(cPickle.dumps(ops)) # test if the ops is pickle-able y_pred_train = ops(X_train) y_pred_score = ops(X_score) cost_train = K.mean(K.categorical_crossentropy(y_pred_train, y)) cost_test_1 = K.mean(K.categorical_crossentropy(y_pred_score, y)) cost_test_2 = K.mean(K.categorical_accuracy(y_pred_score, y)) cost_test_3 = K.confusion_matrix(y_pred_score, y, labels=range(10)) parameters = ops.parameters optimizer = K.optimizers.RMSProp(lr=0.0001, clipnorm=100.) updates = optimizer(cost_train, parameters) print('Building training functions ...')
X = K.placeholder(shape=(None, MAX_SEQ_LEN), dtype='int32', name='X') y = K.placeholder(shape=(None, nb_labels), dtype='float32', name='y') f = N.Sequence([ N.Embedding(tk.nb_words, embedding_dims, W_init=E), N.Dimshuffle(pattern=(0, 1, 'x', 2)), N.Conv(num_filters=128, filter_size=(5, 1), strides=1, pad='valid', activation=K.relu), N.Pool(pool_size=(5, 1), pad='valid', mode='max'), N.Conv(num_filters=128, filter_size=(5, 1), strides=1, pad='valid', activation=K.relu), N.Pool(pool_size=(5, 1), pad='valid', mode='max'), N.Conv(num_filters=128, filter_size=(5, 1), strides=1, pad='valid', activation=K.relu), N.Pool(pool_size=(35, 1), pad='valid', mode='max'), N.Flatten(outdim=2), N.Dense(num_units=128, activation=K.relu), N.Dense(num_units=nb_labels, activation=K.softmax) ], debug=True) y_pred = f(X) params = [p for p in f.parameters if not has_roles(p, EmbeddingWeight)]
def test_lstm(self): W_in_to_ingate = random(28, 32) / 12 W_hid_to_ingate = random(32, 32) / 12 b_ingate = random(32) / 12 W_in_to_forgetgate = random(28, 32) / 12 W_hid_to_forgetgate = random(32, 32) / 12 b_forgetgate = random(32) / 12 W_in_to_cell = random(28, 32) / 12 W_hid_to_cell = random(32, 32) / 12 b_cell = random(32) / 12 W_in_to_outgate = random(28, 32) / 12 W_hid_to_outgate = random(32, 32) / 12 b_outgate = random(32) / 12 W_cell_to_ingate = random(32) / 12 W_cell_to_forgetgate = random(32) / 12 W_cell_to_outgate = random(32) / 12 cell_init = random(1, 32) / 12 hid_init = random(1, 32) / 12 # ====== pre-define parameters ====== # x = random(12, 28, 28) x_mask = np.random.randint(0, 2, size=(12, 28)) # x_mask = np.ones(shape=(12, 28)) # ====== odin ====== # X = K.placeholder(shape=(None, 28, 28), name='X') mask = K.placeholder(shape=(None, 28), name='mask', dtype='int32') f = N.Sequence([ N.Merge([ N.Dense(32, W_init=W_in_to_ingate, b_init=b_ingate, activation=K.linear), N.Dense(32, W_init=W_in_to_forgetgate, b_init=b_forgetgate, activation=K.linear), N.Dense(32, W_init=W_in_to_cell, b_init=b_cell, activation=K.linear), N.Dense(32, W_init=W_in_to_outgate, b_init=b_outgate, activation=K.linear) ], merge_function=K.concatenate), N.LSTM(32, activation=K.tanh, gate_activation=K.sigmoid, W_hid_init=[ W_hid_to_ingate, W_hid_to_forgetgate, W_hid_to_cell, W_hid_to_outgate ], W_peepholes=[ W_cell_to_ingate, W_cell_to_forgetgate, W_cell_to_outgate ], input_mode='skip', name='lstm') ]) y = f(X, h0=hid_init, c0=cell_init, mask=mask) f = K.function([X, mask], y) out1 = f(x, x_mask) # ====== lasagne ====== # if get_backend() == 'tensorflow': self.assertTrue(repr(np.sum(out1))[:4] == repr(43.652363)[:4]) return l = lasagne.layers.InputLayer(shape=(None, 28, 28)) l.input_var = X l_mask = lasagne.layers.InputLayer(shape=(None, 28)) l_mask.input_var = mask l = lasagne.layers.LSTMLayer( l, num_units=32, ingate=lasagne.layers.Gate( nonlinearity=lasagne.nonlinearities.sigmoid, W_in=W_in_to_ingate, W_hid=W_hid_to_ingate, W_cell=W_cell_to_ingate, b=b_ingate), forgetgate=lasagne.layers.Gate( nonlinearity=lasagne.nonlinearities.sigmoid, W_in=W_in_to_forgetgate, W_hid=W_hid_to_forgetgate, W_cell=W_cell_to_forgetgate, b=b_forgetgate), cell=lasagne.layers.Gate(nonlinearity=lasagne.nonlinearities.tanh, W_in=W_in_to_cell, W_hid=W_hid_to_cell, W_cell=None, b=b_cell), outgate=lasagne.layers.Gate( nonlinearity=lasagne.nonlinearities.sigmoid, W_in=W_in_to_outgate, W_hid=W_hid_to_outgate, W_cell=W_cell_to_outgate, b=b_outgate), nonlinearity=lasagne.nonlinearities.tanh, cell_init=cell_init, hid_init=hid_init, mask_input=l_mask, precompute_input=True, backwards=False) y = lasagne.layers.get_output(l) f = K.function([X, mask], y) out2 = f(x, x_mask) # ====== test ====== # self.assertAlmostEqual(np.sum(np.abs(out1 - out2)), 0.)
def test_gru(self): # ====== pre-define parameters ====== # W_in_to_updategate = random(28, 32) W_hid_to_updategate = random(32, 32) b_updategate = random(32) # W_in_to_resetgate = random(28, 32) W_hid_to_resetgate = random(32, 32) b_resetgate = random(32) # W_in_to_hidden_update = random(28, 32) W_hid_to_hidden_update = random(32, 32) b_hidden_update = random(32) # hid_init = random(1, 32) x = random(12, 28, 28) x_mask = np.random.randint(0, 2, size=(12, 28)) # ====== odin ====== # X = K.placeholder(shape=(None, 28, 28), name='X') mask = K.placeholder(shape=(None, 28), name='mask', dtype='int32') f = N.Sequence([ N.Merge([ N.Dense(32, W_init=W_in_to_updategate, b_init=b_updategate, activation=K.linear, name='update'), N.Dense(32, W_init=W_in_to_resetgate, b_init=b_resetgate, activation=K.linear, name='reset'), N.Dense(32, W_init=W_in_to_hidden_update, b_init=b_hidden_update, activation=K.linear, name='hidden') ], merge_function=K.concatenate), N.GRU(32, activation=K.tanh, gate_activation=K.sigmoid, W_hid_init=[ W_hid_to_updategate, W_hid_to_resetgate, W_hid_to_hidden_update ], input_mode='skip') ]) y = f(X, h0=hid_init, mask=mask) f = K.function([X, mask], y) out1 = f(x, x_mask) # ====== lasagne ====== # if get_backend() == 'tensorflow': self.assertTrue(repr(np.sum(out1))[:8] == repr(2490.0596)[:8]) return l = lasagne.layers.InputLayer(shape=(None, 28, 28)) l.input_var = X l_mask = lasagne.layers.InputLayer(shape=(None, 28)) l_mask.input_var = mask l = lasagne.layers.GRULayer( l, num_units=32, updategate=lasagne.layers.Gate( W_cell=None, W_in=W_in_to_updategate, W_hid=W_hid_to_updategate, b=b_updategate, nonlinearity=lasagne.nonlinearities.sigmoid), resetgate=lasagne.layers.Gate( W_cell=None, W_in=W_in_to_resetgate, W_hid=W_hid_to_resetgate, b=b_resetgate, nonlinearity=lasagne.nonlinearities.sigmoid), hidden_update=lasagne.layers.Gate( W_cell=None, W_in=W_in_to_hidden_update, W_hid=W_hid_to_hidden_update, b=b_hidden_update, nonlinearity=lasagne.nonlinearities.tanh), hid_init=hid_init, mask_input=l_mask, precompute_input=True) y = lasagne.layers.get_output(l) f = K.function([X, mask], y) out2 = f(x, x_mask) # ====== test ====== # self.assertAlmostEqual(np.sum(np.abs(out1 - out2)), 0.)
TRAIN_MODEL = True with N.args_scope( ['TimeDelayedConv', dict(time_pool='none', activation=K.relu)], ['Dense', dict(activation=K.linear, b_init=None)], ['BatchNorm', dict(activation=K.relu)]): x_vec = N.Sequence([ N.Dropout(level=0.3), N.TimeDelayedConv(n_new_features=512, n_time_context=5), N.TimeDelayedConv(n_new_features=512, n_time_context=5), N.TimeDelayedConv(n_new_features=512, n_time_context=7), N.Dense(512), N.BatchNorm(), N.Dense(1500), N.BatchNorm(), N.StatsPool(axes=1, output_mode='concat'), N.Flatten(outdim=2), N.Dense(512, name="LatentOutput"), N.BatchNorm(), N.Dense(512), N.BatchNorm(), N.Dense(n_speakers, activation=K.linear, b_init=init_ops.constant_initializer(value=0)) ], debug=1) # ====== create outputs ====== # y_logit = x_vec(X) y_proba = tf.nn.softmax(y_logit) z = K.ComputationGraph(y_proba).get(roles=N.Dense, scope='LatentOutput', beginning_scope=False)[0]
# ====== create the networks ====== # with N.args_scope( ['TimeDelayedConv', dict(time_pool='none', activation=K.relu)], ['Dense', dict(activation=K.linear, b_init=None)]): f = N.Sequence([ N.Dropout(level=0.3), N.TimeDelayedConv(n_new_features=512, n_time_context=5), N.TimeDelayedConv(n_new_features=512, n_time_context=5), N.TimeDelayedConv( n_new_features=512, n_time_context=7, name="LatentTDNN"), N.Dense(512), N.BatchNorm(activation=K.relu), N.Dense(1500), N.BatchNorm(activation=K.relu), N.StatsPool(axes=1, output_mode='concat'), N.Flatten(outdim=2, name="StatsPooling"), N.Dense(512, name="LatentDense"), N.BatchNorm(activation=K.relu), N.Dense(512), N.BatchNorm(activation=K.relu), N.Dense(num_units=n_classes, activation=K.linear, b_init=init_ops.constant_initializer(0)) ], debug=1) # ====== create outputs ====== # y_logit = f(X) y_proba = tf.nn.softmax(y_logit) z1 = K.ComputationGraph(y_proba).get(roles=N.Dense, scope='LatentDense',
f = N.Sequence( [ N.Dimshuffle(pattern=(0, 1, 2, 'x')), N.Conv(num_filters=32, filter_size=3, pad='same', strides=1, activation=K.linear), N.BatchNorm(activation=K.relu), N.Conv(num_filters=64, filter_size=3, pad='same', strides=1, activation=K.linear), N.BatchNorm(activation=K.relu), N.Pool(pool_size=2, strides=None, pad='valid', mode='max'), N.Flatten(outdim=3), # ====== RNN ====== # N.AutoRNN(128, rnn_mode='lstm', num_layers=3, direction_mode='bidirectional', prefer_cudnn=True), # ====== Dense ====== # N.Flatten(outdim=2), # N.Dropout(level=0.2), # adding dropout does not help N.Dense(num_units=1024, activation=K.relu), N.Dense(num_units=512, activation=K.relu), N.Dense(num_units=nb_classes, activation=K.softmax) ], debug=True)
N.Dense(int(args.hdim), b_init=0 if args.no_batchnorm else None, activation=K.relu if args.no_batchnorm else K.linear), None if args.no_batchnorm else N.BatchNorm(activation=K.relu) ], debug=True, name="DenseBatchNorm%d" % index[0]) index[0] += 1 return net f_encoder = N.Sequence([ N.Flatten(outdim=2), N.Dropout(level=args.xdrop) if args.xdrop > 0 else None, dense_creator(), dense_creator(), N.Dropout(level=args.edrop) if args.edrop > 0 else None, ], debug=True, name='Encoder') f_decoder = N.Sequence([ N.Dropout(level=args.zdrop) if args.zdrop > 0 else None, dense_creator(), dense_creator(), N.Dropout(level=args.ddrop) if args.ddrop > 0 else None, ], debug=True, name='Decoder') # =========================================================================== # Create statistical model
y = inputs[1] print("Inputs:", ctext(inputs, 'cyan')) # ====== create the networks ====== # with N.args_scope([('Conv', 'Dense'), dict(b_init=None, activation=K.linear, pad='same')], ['BatchNorm', dict(activation=K.relu)]): f = N.Sequence([ N.Dimshuffle(pattern=(0, 1, 2, 'x')), N.Conv(num_filters=32, filter_size=(9, 7)), N.BatchNorm(), N.Pool(pool_size=(3, 2), strides=2), N.Conv(num_filters=64, filter_size=(5, 3)), N.BatchNorm(), N.Pool(pool_size=(3, 1), strides=(2, 1), name='PoolOutput1'), N.Conv(num_filters=64, filter_size=(5, 3)), N.BatchNorm(), N.Pool(pool_size=(3, 2), strides=(2, 2), name='PoolOutput2'), N.Flatten(outdim=2), N.Dense(512, name="LatentDense"), N.BatchNorm(), N.Dense(512), N.BatchNorm(), N.Dense(n_classes) ], debug=1) # ====== create outputs ====== # y_logit = f(X) y_proba = tf.nn.softmax(y_logit) z1 = K.ComputationGraph(y_proba).get(roles=N.Pool, scope='PoolOutput1', beginning_scope=False)[0]
test.set_recipes(recipes) # =========================================================================== # Create model # =========================================================================== inputs = [ K.placeholder(shape=(None, ) + shape[1:], dtype='float32', name='input%d' % i) for i, shape in enumerate(train.shape) ] print("Inputs:", ctext(inputs, 'cyan')) # ====== create the network ====== # f_encoder = N.Sequence([ N.Dimshuffle(pattern=(0, 1, 2, 'x')), N.Conv( num_filters=32, filter_size=(7, 7), b_init=None, activation=K.linear), N.BatchNorm(), N.Pool(pool_size=(3, 2), strides=2), ], debug=True, name='Encoder') f_latent = N.Sequence([ N.Flatten(outdim=3), N.CudnnRNN( num_units=128, num_layers=1, is_bidirectional=False, rnn_mode='lstm'), ], debug=True, name='Latent') f_decoder = N.Sequence([ N.Flatten(outdim=2), N.Dense(num_units=1024, b_init=None, activation=K.linear), N.BatchNorm(axes=0, activation=K.relu)
y_test = ds['y_test'] # =========================================================================== # Create network # =========================================================================== X = K.placeholder(shape=(None, ) + X_learn.shape[1:], name='X') y_true = K.placeholder(shape=(None, ), name='y_true', dtype='int32') f = N.Sequence([ N.Dimshuffle(pattern=(0, 2, 3, 1)), N.Conv(32, (3, 3), pad='same', stride=(1, 1), activation=K.relu), N.Conv(32, (3, 3), pad='same', stride=(1, 1), activation=K.relu), N.Pool(pool_size=(2, 2), ignore_border=True, strides=None, mode='max'), N.Dropout(level=0.25), N.Conv(64, (3, 3), pad='same', stride=(1, 1), activation=K.relu), N.Conv(64, (3, 3), pad='same', stride=(1, 1), activation=K.relu), N.Pool(pool_size=(2, 2), ignore_border=True, strides=None, mode='max'), N.Dropout(level=0.25), N.Flatten(outdim=2), N.Dense(512, activation=K.relu), N.Dropout(level=0.5), N.Dense(10, activation=K.softmax) ], debug=True) K.set_training(True) y_train = f(X) K.set_training(False) y_pred = f(X) cost_train = K.mean(K.categorical_crossentropy(y_train, y_true)) cost_pred = K.mean(K.categorical_accuracy(y_pred, y_true)) cost_eval = K.mean(K.categorical_crossentropy(y_pred, y_true))
# =========================================================================== f = N.Sequence( [ N.Embedding(max_features, embedding_size), N.Dropout(0.25), N.Dimshuffle(pattern=(0, 1, 'x', 2)), # convolution on time dimension N.Conv(nb_filter, filter_size=(filter_length, 1), pad='valid', stride=(1, 1), activation=K.relu), N.Pool(pool_size=(pool_length, 1), mode='max'), N.Flatten(outdim=3), N.Merge( [ N.Dense(lstm_output_size, activation=K.linear, name='ingate'), # input-gate N.Dense(lstm_output_size, activation=K.linear, name='forgetgate'), # forget-gate N.Dense(lstm_output_size, activation=K.linear, name='cellupdate'), # cell-update N.Dense(lstm_output_size, activation=K.linear, name='outgate') # output-gate ], merge_function=K.concatenate), N.LSTM(num_units=lstm_output_size, input_mode='skip')[:, -1], N.Dense(1, activation=K.sigmoid) ], debug=True) K.set_training(True)
# Create the network # =========================================================================== LATENT_DROPOUT = 0.3 if args.cnn: with N.args_scope(([N.Conv, N.Dense], dict(b_init=None, activation=K.linear)), (N.BatchNorm, dict(activation=tf.nn.elu)), (N.Pool, dict(mode='max', pool_size=2))): f_encoder = N.Sequence([ N.Dropout(level=0.5), N.Dimshuffle((0, 2, 3, 1)) if is_cifar10 else N.Dimshuffle((0, 1, 2, 'x')), N.Conv(num_filters=32, filter_size=3, pad='valid'), N.Pool(), N.BatchNorm(), N.Conv(num_filters=64, filter_size=3, pad='same'), N.BatchNorm(), N.Conv(num_filters=64, filter_size=3, pad='valid'), N.BatchNorm(activation=tf.nn.elu), N.Pool(), N.Flatten(outdim=2), N.Dense(num_units=args.dim) ], debug=True, name='EncoderNetwork') f_decoder = N.Sequence([ N.Dropout(level=LATENT_DROPOUT, noise_type='uniform'), N.Noise(level=1.0, noise_type='gaussian'), N.Dimshuffle((0, 'x', 'x', 1)), N.TransposeConv(num_filters=64, filter_size=3, pad='valid'),
ds = fuel.load_mnist() else: ds = fuel.load_cifar10() X = K.placeholder(shape=(None, ) + ds['X_train'].shape[1:], name='X') y = K.placeholder(shape=(None, ), name='y', dtype='int32') # =========================================================================== # Build network # =========================================================================== ops = N.Sequence([ N.Dimshuffle((0, 1, 2, 'x')) if USE_MNIST_DATA else None, N.BatchNorm(axes='auto'), N.Conv(32, (3, 3), strides=(1, 1), pad='same', activation=K.relu), N.Pool(pool_size=(2, 2), strides=None), N.Conv(64, (3, 3), strides=(1, 1), pad='same', activation=K.relu), N.Pool(pool_size=(2, 2), strides=None), N.Flatten(outdim=2), N.Dense(256, activation=K.relu), N.Dense(10, activation=K.softmax) ], debug=True) ops = cPickle.loads(cPickle.dumps(ops)) # test if the ops is pickle-able K.set_training(True) y_pred_train = ops(X) K.set_training(False) y_pred_score = ops(X) cost_train = K.mean(K.categorical_crossentropy(y_pred_train, y)) cost_test_1 = K.mean(K.categorical_crossentropy(y_pred_score, y)) cost_test_2 = K.mean(K.categorical_accuracy(y_pred_score, y))