def test_nested_sequential(): input1 = Input(1) in_seq = Sequential([dense(**params), Activation('linear')]) output = Sequential( [in_seq, dense(initial_weights=[W2, b2]), Activation('linear')])(input1) feed_test(input1, output, wpb2, 4)
def test_shared_nested_sequential(): input1 = Input(1) input2 = Input(1) in_seq = Sequential([dense(**params), Activation('linear')]) seq = Sequential( [in_seq, dense(initial_weights=[W2, b2]), Activation('linear')]) output = ElementWiseSum()([seq(input1), seq(input2)]) feed_test([input1, input2], output, 2 * wpb2, 4)
def test_trainable(): def not_trainable(model): history = model.fit([[1]], 3 * wpb, nb_epoch=2) return history[0]['loss'] == history[1]['loss'] m1 = create_model(dense(trainable=False)) m2 = create_model(Sequential([dense()], trainable=False)) m3 = create_model(Sequential([Sequential([dense()])], trainable=False)) assert not_trainable(m1) assert not_trainable(m2) assert not_trainable(m3)
def test_initial_weights(): m1 = create_model(dense(initial_weights={'W': W, 'b': b})) m2 = create_model(dense(initial_weights={'W': W})) m3 = create_model(dense(initial_weights=[W, b])) m4 = create_model(dense(initial_weights=[W])) m5 = create_model(dense(initial_weights=W)) m6 = create_model(dense(initial_weights=W.tolist())) m7 = create_model(Sequential([dense(initial_weights=[W, b])])) m8 = create_model(Sequential([Sequential([dense(initial_weights=[W, b])]) ])) assert (m1.predict([[1]]) == wpb).all() assert (m2.predict([[1]]) == W).all() assert (m3.predict([[1]]) == wpb).all() assert (m4.predict([[1]]) == W).all() assert (m5.predict([[1]]) == W).all() assert (m6.predict([[1]]) == W).all() assert (m7.predict([[1]]) == wpb).all() assert (m8.predict([[1]]) == wpb).all()
def test_feed_exceptions(): # Forget to feed d1 with pytest.raises(KError): d1 = Dense(1) Dense(1)(d1) # Forget to feed d1 with pytest.raises(KError): d1 = Dense(1) Dense(1)(d1) # First layer of sequential should be input with pytest.raises(KError): s1 = Sequential([Dense(1)]) s1.compile('sgd', 'mse') # Recursive feeding with pytest.raises(KError): input1 = Input(1) d = Dense(1) d1 = d(input1) d(d1) # Recursive feeding with pytest.raises(KError): i1 = Input(1) i2 = Input(1) i3 = Input(1) i4 = Input(1) m = ElementWiseSum() m1 = m([i1, i2]) m2 = m([i3, i4]) m([m1, m2]) # m'th output feeds to m again # shape should be assigned as a tuple, i.e. Input((1,2)) with pytest.raises(KError): input1 = Input(1, 2) # You should not feed an Input layer with pytest.raises(KError): input1 = Input(1)(Input(1))
def test_check_input_shape(): # Class inheriting Layer does not allow mutiple inputs with pytest.raises(KError): Sequential(Dense(1)([Input(1), Input(1)])) # Input dimension mismatch with pytest.raises(KError): input1 = Input((1, 1, 1)) Dense(1)(input1) # Multiple inputs layer default accepts equal shape inputs with pytest.raises(KError): input1 = Input((1, 1, 1)) Dense(1)(input1)
def test_regularizers(): l = 0.01 W_l1 = l * np.sum(abs(W)) wpb_l1 = l * np.sum(abs(wpb)) m1 = create_model(dense(initial_weights=[W, b])) m2 = create_model( dense(initial_weights=[W, b], regularizers={ 'W': L1(l), 'b': L1(l) })) m3 = create_model(dense(initial_weights=[W, b], regularizers={'W': L1(l)})) m4 = create_model( dense(initial_weights=[W, b], regularizers=[L1(l), L1(l)])) m5 = create_model(dense(initial_weights=[W, b], regularizers=[L1(l)])) m6 = create_model( Sequential( [dense(initial_weights=[W, b], regularizers=[L1(l), L1(l)])])) m7 = create_model( Sequential([ Sequential( [dense(initial_weights=[W, b], regularizers=[L1(l), L1(l)])]) ])) def eval_model(m, train_mode=True): # output - expected = regularizer loss return m.evaluate([[1]], [wpb], train_mode=train_mode) assert eval_model(m1) == eval_model(m2, train_mode=False) assert_allclose(eval_model(m2), wpb_l1) assert_allclose(eval_model(m3), W_l1) assert_allclose(eval_model(m4), wpb_l1) assert_allclose(eval_model(m5), W_l1) assert_allclose(eval_model(m6), wpb_l1) assert_allclose(eval_model(m7), wpb_l1)
def test_wrc_exceptions(): # Sequential should be initialized with a list of layer with pytest.raises(KError): Sequential(Dense(2)) # Layer weight shape mismatch with pytest.raises(KError): create_model(initial_weights={'W': np.expand_dims(W, axis=1), 'b': b}) # regularizers does not take single input with pytest.raises(KError): create_model(initial_weights=[W, b], regularizers='l1') # constraints does not take single input with pytest.raises(KError): create_model(initial_weights=[W, b], constraints='maxnorm')
def test_sgd(): ''' math: Let W = [A, B], b = [C, D], y = [E, F] MSE = 1/2*[(A+C-E)^2 + (B+D-F)^2] dA, dB, dC, dD = (A+C-E), (B+D-F), (A+C-E), (B+D-F) Assume E = 2*(A+C), F = 2*(B+D) dA, dB, dC, dD = -(A+C), -(B+D), -(A+C), -(B+D) A-=lr*dA, B-=lr*dB, C-=lr*dC, D-=lr*dD ''' lr = 0.01 W = np.array([[1, 2]]) b = np.array([3, 4]) wpb = W+b model = Sequential([Input(1), Dense(2, initial_weights=[W, b])]) optimizer = SGD(lr=lr) model.compile(optimizer, 'mse') model.fit([1], 2*wpb, nb_epoch=1) expectedW = W+lr*wpb expectedb = (b+lr*wpb).reshape((2,)) assert_allclose(B.eval(model.layers[1].W), expectedW) assert_allclose(B.eval(model.layers[1].b), expectedb)
X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols) X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols) X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_train /= 255 X_test /= 255 print('X_train shape:', X_train.shape) print(X_train.shape[0], 'train samples') print(X_test.shape[0], 'test samples') # convert class vectors to binary class matrices Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) model = Sequential() model.add(Input((1, img_rows, img_cols))) model.add(Convolution2D(nb_kernel, kernel_size[0], kernel_size[1], pad='valid')) model.add(Activation('relu')) model.add(Convolution2D(nb_kernel, kernel_size[0], kernel_size[1])) model.add(Activation('relu')) model.add(Pooling2D('max', pool_size=(nb_pool, nb_pool))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(nb_classes))
def test_shared_sequential(): input1 = Input(1) input2 = Input(1) shared = Sequential([dense(**params), dense(initial_weights=[W2, b2])]) output = ElementWiseSum()([shared(input1), shared(input2)]) feed_test([input1, input2], output, 2 * wpb2, 4)
(X_train, y_train), (X_test, y_test) = mnist.load_data() X_train = X_train.reshape(60000, 784) X_test = X_test.reshape(10000, 784) X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_train /= 255 X_test /= 255 print(X_train.shape[0], 'train samples') print(X_test.shape[0], 'test samples') # convert class vectors to binary class matrices y_train = np_utils.to_categorical(y_train, nb_classes) y_test = np_utils.to_categorical(y_test, nb_classes) model = Sequential() model.add(Input(784)) model.add(Dense(512)) model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(Dense(512)) model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(Dense(10)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) history = model.fit(X_train, y_train,
def test_sequential_multi_input(): input1 = Input(1) input2 = Input(1) output = Sequential([ElementWiseSum(), dense(**params)])([input1, input2]) feed_test([input1, input2], output, 2 * W + b, 2)
def test_sequential_as_input(): seq = Sequential([Input(1), dense(**params)]) output = dense(initial_weights=[W2, b2])(seq) feed_test(seq, output, wpb2, 4)
def test_sequential_layer(): input1 = Input(1) output = Sequential([dense(**params), dense(initial_weights=[W2, b2])])(input1) feed_test(input1, output, wpb2, 4)
def test_TimeDistributed(): W = np.array([[1, 2]]) b = np.array([3, 4]) dense = core.Dense(2, initial_weights=[W, b]) exp_output = [] for o_slice in origin: exp_output.append(np.dot([o_slice], W)+b) exp_output = np.concatenate(exp_output, axis=0) layer_test(wrappers.TimeDistributed(dense), origin, exp_output) # test undetermined input length model = Sequential() model.add(Input(None, dtype='int32')) model.add(Embedding(4, 1, initial_weights=origin.reshape(-1,1))) model.add(wrappers.TimeDistributed(dense)) model.compile('sgd', 'mse') assert_allclose(model.predict([[0,1],[2,3]]), exp_output)
num_predictions = 20 save_dir = os.path.join(os.getcwd(), 'saved_models') model_name = 'keraflow_cifar10_trained_model.json' weight_name = 'keraflow_cifar10_trained_weights.hkl' # The data, shuffled and split between train and test sets: (x_train, y_train), (x_test, y_test) = cifar10.load_data() print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # Convert class vectors to binary class matrices. y_train = keraflow.utils.to_categorical(y_train, num_classes) y_test = keraflow.utils.to_categorical(y_test, num_classes) model = Sequential() model.add(Input(x_train.shape[1:])) model.add(Convolution2D(32, 3, 3, pad='same')) model.add(Activation('relu')) model.add(Convolution2D(32, 3, 3)) model.add(Activation('relu')) model.add(Pooling1D('max', pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Convolution2D(64, 3, 3, pad='same')) model.add(Activation('relu')) model.add(Convolution2D(64, 3, 3)) model.add(Activation('relu')) model.add(Pooling1D('max', pool_size=(2, 2))) model.add(Dropout(0.25))
def test_simplernn(): def simplernn(W, U): def call(xt, ytm1): h = np.dot(xt, W) yt = h + np.dot(ytm1, U) return yt, [yt] return call W = np.ones((input_dim, output_dim)) U = np.ones((output_dim, output_dim)) run_test(recurrent.SimpleRNN, simplernn(W, U), num_states=1, activation='linear', initial_weights=[W, U]) # test mask # we only test mask on SimpleRNN since the implementation is not dependent on each rnn. from keraflow.models import Sequential from keraflow.layers import Input, Embedding vocab_size = origin.shape[0] emb_dim = origin.shape[1] if B.name() == 'tensorflow': input_length = origin.shape[0] else: input_length = None model = Sequential([]) model.add(Input(input_length, mask_value=1)) model.add(Embedding(vocab_size, emb_dim, initial_weights=origin)) model.add( recurrent.SimpleRNN(output_dim, initial_weights=[W, U], activation='linear')) model.compile('sgd', 'mse') exp_output = rnn([origin[:1]], simplernn(W, U), output_dim, num_states=1) assert_allclose(exp_output, model.predict([[0, 1]])) if input_length is None: assert_allclose(exp_output, model.predict([[0]]))
def create_model(layer): model = Sequential([Input(1), layer]) model.compile('sgd', 'mse') return model
print('Pad sequences (samples x time)') X_train = pad_sequences(X_train, maxlen=maxlen) X_test = pad_sequences(X_test, maxlen=maxlen) # import numpy as np # X_train = np.concatenate((X_train[:100],X_train[-100:]), axis=0) # y_train = np.concatenate((y_train[:100],y_train[-100:]), axis=0) # X_test = np.concatenate((X_test[:10],X_test[-10:]), axis=0) # y_test = np.concatenate((y_test[:10],y_test[-10:]), axis=0) print('X_train shape:', X_train.shape) print('X_test shape:', X_test.shape) print('Build model...') model = Sequential() model.add(Input(maxlen)) # we start off with an efficient embedding layer which maps # our vocab indices into embedding_dims dimensions model.add(Embedding(max_features, embedding_dims, dropout=0.2)) # we add a Convolution1D, which will learn nb_kernel # word group filters of size kernel_row: model.add( Convolution1D(nb_kernel=nb_kernel, kernel_row=kernel_row, pad='valid', activation='relu', stride=1))
def test_constraints(): maxnorm = 2 m1 = create_model(dense(initial_weights=[W, b])) m2 = create_model( dense(initial_weights=[W, b], constraints={ 'W': MaxNorm(m=maxnorm, axis=1), 'b': MaxNorm(m=maxnorm, axis=0) })) m3 = create_model( dense(initial_weights=[W, b], constraints={'W': MaxNorm(m=maxnorm, axis=1)})) m4 = create_model( dense(initial_weights=[W, b], constraints=[ MaxNorm(m=maxnorm, axis=1), MaxNorm(m=maxnorm, axis=0) ])) m5 = create_model( dense(initial_weights=[W, b], constraints=[MaxNorm(m=maxnorm, axis=1)])) m6 = create_model( Sequential([ dense(initial_weights=[W, b], constraints=[ MaxNorm(m=maxnorm, axis=1), MaxNorm(m=maxnorm, axis=0) ]) ])) m7 = create_model( Sequential([ Sequential([ dense(initial_weights=[W, b], constraints=[ MaxNorm(m=maxnorm, axis=1), MaxNorm(m=maxnorm, axis=0) ]) ]) ])) m1.fit([[1]], 5 * wpb, nb_epoch=1) m2.fit([[1]], 5 * wpb, nb_epoch=1) m3.fit([[1]], 5 * wpb, nb_epoch=1) m4.fit([[1]], 5 * wpb, nb_epoch=1) m5.fit([[1]], 5 * wpb, nb_epoch=1) m6.fit([[1]], 5 * wpb, nb_epoch=1) m7.fit([[1]], 5 * wpb, nb_epoch=1) m1w = B.eval(m1.layers[1].W) m1b = B.eval(m1.layers[1].b) m1W_norm = np.sqrt(np.sum(np.square(m1w), axis=1)) m1b_norm = np.sqrt(np.sum(np.square(m1b), axis=0)) constraint_w = m1w * maxnorm / m1W_norm constraint_b = m1b * maxnorm / m1b_norm assert_allclose(B.eval(m2.layers[1].W), constraint_w) assert_allclose(B.eval(m2.layers[1].b), constraint_b) assert_allclose(B.eval(m3.layers[1].W), constraint_w) assert_allclose(B.eval(m3.layers[1].b), m1b) assert_allclose(B.eval(m4.layers[1].W), constraint_w) assert_allclose(B.eval(m4.layers[1].b), constraint_b) assert_allclose(B.eval(m5.layers[1].W), constraint_w) assert_allclose(B.eval(m5.layers[1].b), m1b) assert_allclose(B.eval(m6.layers[1].embedded_layers[0].W), constraint_w) assert_allclose(B.eval(m6.layers[1].embedded_layers[0].b), constraint_b) assert_allclose( B.eval(m7.layers[1].embedded_layers[0].embedded_layers[0].W), constraint_w) assert_allclose( B.eval(m7.layers[1].embedded_layers[0].embedded_layers[0].b), constraint_b)
def create_model(**kwargs): model = Sequential([Input(1), Dense(2, **kwargs)]) model.compile('sgd', 'mse') return model
maxlen = 80 # cut texts after this number of words (among top max_features most common words) batch_size = 32 print('Loading data...') (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features) print(len(X_train), 'train sequences') print(len(X_test), 'test sequences') print('Pad sequences (samples x time)') X_train = pad_sequences(X_train, maxlen=maxlen) X_test = pad_sequences(X_test, maxlen=maxlen) print('X_train shape:', X_train.shape) print('X_test shape:', X_test.shape) print('Build model...') model = Sequential() model.add(Input(None, dtype='int32')) model.add(Embedding(max_features, 128)) model.add(LSTM(128, dropout_W=0.2, dropout_U=0.2)) # try using a GRU instead, for fun model.add(Dense(1)) model.add(Activation('sigmoid')) # try using different optimizers and different optimizer configs model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print('Train...') model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=15, validation_data=(X_test, y_test)) score, acc = model.evaluate(X_test, y_test, batch_size=batch_size)