def test_merge_overlap(): left = Sequential() left.add(Dense(nb_hidden, input_shape=(input_dim,))) left.add(Activation('relu')) model = Sequential() model.add(Merge([left, left], mode='sum')) model.add(Dense(nb_class)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=1, validation_data=(X_test, y_test)) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=False, verbose=2, validation_data=(X_test, y_test)) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=2, validation_split=0.1) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=False, verbose=1, validation_split=0.1) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, shuffle=False) model.train_on_batch(X_train[:32], y_train[:32]) loss = model.evaluate(X_train, y_train, verbose=0) assert(loss < 0.7) model.predict(X_test, verbose=0) model.predict_classes(X_test, verbose=0) model.predict_proba(X_test, verbose=0) model.get_config(verbose=0) fname = 'test_merge_overlap_temp.h5' model.save_weights(fname, overwrite=True) model.load_weights(fname) os.remove(fname) nloss = model.evaluate(X_train, y_train, verbose=0) assert(loss == nloss)
def test_sequential_model_saving(): model = Sequential() model.add(Dense(2, input_dim=3)) model.add(Dense(3)) model.compile(loss='mse', optimizer='rmsprop', metrics=['acc']) x = np.random.random((1, 3)) y = np.random.random((1, 3)) model.train_on_batch(x, y) out = model.predict(x) fname = 'tmp_' + str(np.random.randint(10000)) + '.h5' save_model(model, fname) new_model = load_model(fname) out2 = new_model.predict(x) assert_allclose(out, out2, atol=1e-05) # test that new updates are the same with both models x = np.random.random((1, 3)) y = np.random.random((1, 3)) model.train_on_batch(x, y) new_model.train_on_batch(x, y) out = model.predict(x) out2 = new_model.predict(x) assert_allclose(out, out2, atol=1e-05) # test load_weights on model file model.load_weights(fname) os.remove(fname)
def test_nested_sequential(in_tmpdir): (x_train, y_train), (x_test, y_test) = _get_test_data() inner = Sequential() inner.add(Dense(num_hidden, input_shape=(input_dim,))) inner.add(Activation('relu')) inner.add(Dense(num_class)) middle = Sequential() middle.add(inner) model = Sequential() model.add(middle) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2, validation_split=0.1) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=0) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, shuffle=False) model.train_on_batch(x_train[:32], y_train[:32]) loss = model.evaluate(x_test, y_test, verbose=0) model.predict(x_test, verbose=0) model.predict_classes(x_test, verbose=0) model.predict_proba(x_test, verbose=0) fname = 'test_nested_sequential_temp.h5' model.save_weights(fname, overwrite=True) inner = Sequential() inner.add(Dense(num_hidden, input_shape=(input_dim,))) inner.add(Activation('relu')) inner.add(Dense(num_class)) middle = Sequential() middle.add(inner) model = Sequential() model.add(middle) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model.load_weights(fname) os.remove(fname) nloss = model.evaluate(x_test, y_test, verbose=0) assert(loss == nloss) # test serialization config = model.get_config() Sequential.from_config(config) model.summary() json_str = model.to_json() model_from_json(json_str) yaml_str = model.to_yaml() model_from_yaml(yaml_str)
def test_sequential_model_saving(): model = Sequential() model.add(Dense(2, input_shape=(3,))) model.add(RepeatVector(3)) model.add(TimeDistributed(Dense(3))) model.compile(loss=losses.MSE, optimizer=optimizers.RMSprop(lr=0.0001), metrics=[metrics.categorical_accuracy], sample_weight_mode='temporal') x = np.random.random((1, 3)) y = np.random.random((1, 3, 3)) model.train_on_batch(x, y) out = model.predict(x) _, fname = tempfile.mkstemp('.h5') save_model(model, fname) new_model = load_model(fname) os.remove(fname) out2 = new_model.predict(x) assert_allclose(out, out2, atol=1e-05) # test that new updates are the same with both models x = np.random.random((1, 3)) y = np.random.random((1, 3, 3)) model.train_on_batch(x, y) new_model.train_on_batch(x, y) out = model.predict(x) out2 = new_model.predict(x) assert_allclose(out, out2, atol=1e-05)
def test_sequential_model_saving_2(): # test with custom optimizer, loss custom_opt = optimizers.rmsprop custom_loss = losses.mse model = Sequential() model.add(Dense(2, input_shape=(3,))) model.add(Dense(3)) model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc']) x = np.random.random((1, 3)) y = np.random.random((1, 3)) model.train_on_batch(x, y) out = model.predict(x) load_kwargs = {'custom_objects': {'custom_opt': custom_opt, 'custom_loss': custom_loss}} _, fname = tempfile.mkstemp('.h5') save_model(model, fname) new_model_disk = load_model(fname, **load_kwargs) os.remove(fname) with tf_file_io_proxy('keras.engine.saving.tf_file_io') as file_io_proxy: gcs_filepath = file_io_proxy.get_filepath(filename=fname) save_model(model, gcs_filepath) file_io_proxy.assert_exists(gcs_filepath) new_model_gcs = load_model(gcs_filepath, **load_kwargs) file_io_proxy.delete_file(gcs_filepath) # cleanup for new_model in [new_model_disk, new_model_gcs]: new_out = new_model.predict(x) assert_allclose(out, new_out, atol=1e-05)
def test_sequential_model_saving_2(): # test with funkier config model = Sequential() model.add(Dense(2, input_dim=3)) model.add(RepeatVector(3)) model.add(TimeDistributed(Dense(3))) model.compile(loss=objectives.MSE, optimizer=optimizers.RMSprop(lr=0.0001), metrics=[metrics.categorical_accuracy], sample_weight_mode='temporal') x = np.random.random((1, 3)) y = np.random.random((1, 3, 3)) model.train_on_batch(x, y) out = model.predict(x) fname = 'tmp_' + str(np.random.randint(10000)) + '.h5' save_model(model, fname) new_model = load_model(fname) os.remove(fname) out2 = new_model.predict(x) assert_allclose(out, out2, atol=1e-05) # test that new updates are the same with both models x = np.random.random((1, 3)) y = np.random.random((1, 3, 3)) model.train_on_batch(x, y) new_model.train_on_batch(x, y) out = model.predict(x) out2 = new_model.predict(x) assert_allclose(out, out2, atol=1e-05)
def test_nested_sequential(): (X_train, y_train), (X_test, y_test) = _get_test_data() inner = Sequential() inner.add(Dense(nb_hidden, input_shape=(input_dim,))) inner.add(Activation("relu")) inner.add(Dense(nb_class)) middle = Sequential() middle.add(inner) model = Sequential() model.add(middle) model.add(Activation("softmax")) model.compile(loss="categorical_crossentropy", optimizer="rmsprop") model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, validation_data=(X_test, y_test)) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=2, validation_split=0.1) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, shuffle=False) model.train_on_batch(X_train[:32], y_train[:32]) loss = model.evaluate(X_test, y_test, verbose=0) model.predict(X_test, verbose=0) model.predict_classes(X_test, verbose=0) model.predict_proba(X_test, verbose=0) fname = "test_nested_sequential_temp.h5" model.save_weights(fname, overwrite=True) inner = Sequential() inner.add(Dense(nb_hidden, input_shape=(input_dim,))) inner.add(Activation("relu")) inner.add(Dense(nb_class)) middle = Sequential() middle.add(inner) model = Sequential() model.add(middle) model.add(Activation("softmax")) model.compile(loss="categorical_crossentropy", optimizer="rmsprop") model.load_weights(fname) os.remove(fname) nloss = model.evaluate(X_test, y_test, verbose=0) assert loss == nloss # test serialization config = model.get_config() new_model = Sequential.from_config(config) model.summary() json_str = model.to_json() new_model = model_from_json(json_str) yaml_str = model.to_yaml() new_model = model_from_yaml(yaml_str)
def test_dynamic_behavior(layer_class): layer = layer_class(units, input_shape=(None, embedding_dim)) model = Sequential() model.add(layer) model.compile('sgd', 'mse') x = np.random.random((num_samples, timesteps, embedding_dim)) y = np.random.random((num_samples, units)) model.train_on_batch(x, y)
def test_with_list_as_targets(): model = Sequential() model.add(Dense(1, input_dim=3, trainable=False)) model.compile('rmsprop', 'mse') x = np.random.random((2, 3)) y = [0, 1] model.train_on_batch(x, y)
def test_dynamic_behavior(layer_class): layer = layer_class(output_dim, input_dim=embedding_dim) model = Sequential() model.add(layer) model.compile('sgd', 'mse') x = np.random.random((nb_samples, timesteps, embedding_dim)) y = np.random.random((nb_samples, output_dim)) model.train_on_batch(x, y)
def test_hierarchical_softmax(timesteps = 15, input_dim = 50, batch_size = 32, output_dim = 3218, batches = 300, epochs = 30): model = Graph() model.add_input(name='real_input', batch_input_shape=(batch_size, timesteps, input_dim)) model.add_input(name='train_input', batch_input_shape=(batch_size, timesteps), dtype='int32') model.add_node(HierarchicalSoftmax(output_dim, input_dim = input_dim, input_length = timesteps), name = 'hs', inputs=['real_input','train_input'], merge_mode = 'join', create_output=True) model.compile(loss={'hs':hs_categorical_crossentropy}, optimizer='adam') print "hs model compiled" model2 = Sequential() model2.add(TimeDistributedDense(output_dim, batch_input_shape=(batch_size, timesteps, input_dim))) model2.add(Activation('softmax')) model2.compile(loss='categorical_crossentropy', optimizer='adam') print "softmax model compiled" learn_f = np.random.normal(size = (input_dim, output_dim)) learn_f = np.divide(learn_f, norm(learn_f, axis=1)[:,None]) print "learn_f generated" for j in range(epochs): batch_data= generate_batch(learn_f, batch_size, timesteps, input_dim, output_dim, batches) print "Epoch", j, "data genrated" p = Progbar(batches * batch_size) for b in batch_data: data_train = {'real_input': b[0], 'train_input': b[1], 'hs':b[2]} loss = float(model.train_on_batch(data_train)[0]) p.add(batch_size,[('hs_loss', loss)]) p2 = Progbar(batches * batch_size) for b in batch_data: loss, acc = model2.train_on_batch(b[0], b[3], accuracy=True) p2.add(batch_size,[('softmax_loss', loss),('softmax_acc', acc)]) test_data = generate_batch(learn_f, batch_size, timesteps, input_dim, output_dim, batches) p = Progbar(batches * batch_size) for b in test_data: data_test = {'real_input': b[0], 'train_input': b[1], 'hs':b[3]} loss = float(model.test_on_batch(data_test)[0]) p.add(batch_size,[('hs__test_loss', loss)]) p2 = Progbar(batches * batch_size) for b in batch_data: loss = float(model2.train_on_batch(b[0], b[3])[0]) p2.add(batch_size,[('softmax_loss', loss)])
def test_loading_weights_by_name_2(): """ test loading model weights by name on: - both sequential and functional api models - different architecture with shared names """ # test with custom optimizer, loss custom_opt = optimizers.rmsprop custom_loss = losses.mse # sequential model model = Sequential() model.add(Dense(2, input_shape=(3,), name='rick')) model.add(Dense(3, name='morty')) model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc']) x = np.random.random((1, 3)) y = np.random.random((1, 3)) model.train_on_batch(x, y) out = model.predict(x) old_weights = [layer.get_weights() for layer in model.layers] _, fname = tempfile.mkstemp('.h5') model.save_weights(fname) # delete and recreate model using Functional API del(model) data = Input(shape=(3,)) rick = Dense(2, name='rick')(data) jerry = Dense(3, name='jerry')(rick) # add 2 layers (but maintain shapes) jessica = Dense(2, name='jessica')(jerry) morty = Dense(3, name='morty')(jessica) model = Model(inputs=[data], outputs=[morty]) model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc']) # load weights from first model model.load_weights(fname, by_name=True) os.remove(fname) out2 = model.predict(x) assert np.max(np.abs(out - out2)) > 1e-05 rick = model.layers[1].get_weights() jerry = model.layers[2].get_weights() jessica = model.layers[3].get_weights() morty = model.layers[4].get_weights() assert_allclose(old_weights[0][0], rick[0], atol=1e-05) assert_allclose(old_weights[0][1], rick[1], atol=1e-05) assert_allclose(old_weights[1][0], morty[0], atol=1e-05) assert_allclose(old_weights[1][1], morty[1], atol=1e-05) assert_allclose(np.zeros_like(jerry[1]), jerry[1]) # biases init to 0 assert_allclose(np.zeros_like(jessica[1]), jessica[1]) # biases init to 0
def test_unitnorm_constraint(self): lookup = Sequential() lookup.add(Embedding(3, 2, weights=[self.W1], W_constraint=unitnorm(), input_length=1)) lookup.add(Flatten()) lookup.add(Dense(1)) lookup.add(Activation('sigmoid')) lookup.compile(loss='binary_crossentropy', optimizer='sgd', class_mode='binary') lookup.train_on_batch(self.X1, np.array([[1], [0]], dtype='int32')) norm = np.linalg.norm(lookup.params[0].get_value(), axis=1) self.assertTrue(np.allclose(norm, np.ones_like(norm).astype('float32')))
def _runner(layer_class): """ All the recurrent layers share the same interface, so we can run through them with a single function. """ for ret_seq in [True, False]: layer = layer_class(output_dim, return_sequences=ret_seq, weights=None, input_shape=(timesteps, input_dim)) layer.input = K.variable(np.ones((nb_samples, timesteps, input_dim))) layer.get_config() for train in [True, False]: out = K.eval(layer.get_output(train)) # Make sure the output has the desired shape if ret_seq: assert(out.shape == (nb_samples, timesteps, output_dim)) else: assert(out.shape == (nb_samples, output_dim)) mask = layer.get_output_mask(train) # check statefulness layer = layer_class(output_dim, return_sequences=False, stateful=True, weights=None, batch_input_shape=(nb_samples, timesteps, input_dim)) model = Sequential() model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones((nb_samples, timesteps, input_dim))) assert(out1.shape == (nb_samples, output_dim)) # train once so that the states change model.train_on_batch(np.ones((nb_samples, timesteps, input_dim)), np.ones((nb_samples, output_dim))) out2 = model.predict(np.ones((nb_samples, timesteps, input_dim))) # if the state is not reset, output should be different assert(out1.max() != out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones((nb_samples, timesteps, input_dim))) assert(out2.max() != out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones((nb_samples, timesteps, input_dim))) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones((nb_samples, timesteps, input_dim))) assert(out4.max() != out5.max())
def test_linear_in_bounds_regularizer(): model = Sequential() model.add(LinearInBounds(-1, 1, clip=True, input_shape=(1,))) model.compile('adam', 'mse') loss = model.train_on_batch(np.array([[0]]), np.array([[0]])) assert float(loss) == 0 loss_on_2 = model.train_on_batch(np.array([[2]]), np.array([[1]])) assert float(loss_on_2) > 0 loss_on_100 = model.train_on_batch(np.array([[100]]), np.array([[1]])) assert float(loss_on_2) < float(loss_on_100)
def test_statefulness(layer_class): model = Sequential() model.add(embeddings.Embedding(embedding_num, embedding_dim, mask_zero=True, input_length=timesteps, batch_input_shape=(nb_samples, timesteps))) layer = layer_class(output_dim, return_sequences=False, stateful=True, weights=None) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones((nb_samples, timesteps))) assert(out1.shape == (nb_samples, output_dim)) # train once so that the states change model.train_on_batch(np.ones((nb_samples, timesteps)), np.ones((nb_samples, output_dim))) out2 = model.predict(np.ones((nb_samples, timesteps))) # if the state is not reset, output should be different assert(out1.max() != out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones((nb_samples, timesteps))) assert(out2.max() != out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones((nb_samples, timesteps))) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones((nb_samples, timesteps))) assert(out4.max() != out5.max()) # Check masking layer.reset_states() left_padded_input = np.ones((nb_samples, timesteps)) left_padded_input[0, :1] = 0 left_padded_input[1, :2] = 0 out6 = model.predict(left_padded_input) layer.reset_states() right_padded_input = np.ones((nb_samples, timesteps)) right_padded_input[0, -1:] = 0 right_padded_input[1, -2:] = 0 out7 = model.predict(right_padded_input) assert_allclose(out7, out6, atol=1e-5)
def test_naming(): layer = core.Dense(2, input_dim=2) assert layer.name == 'dense' model = Sequential() model.add(core.Dense(2, input_dim=2, name='my_dense')) model.add(core.Dense(2, name='my_dense')) assert model.layers[0].name == 'my_dense' assert model.layers[1].name == 'my_dense' model.compile(optimizer='rmsprop', loss='mse') model.train_on_batch(np.random.random((2, 2)), np.random.random((2, 2)))
def test_TimeDistributed(): # first, test with Dense layer model = Sequential() model.add(wrappers.TimeDistributed(core.Dense(2), input_shape=(3, 4))) model.add(core.Activation('relu')) model.compile(optimizer='rmsprop', loss='mse') model.fit(np.random.random((10, 3, 4)), np.random.random((10, 3, 2)), nb_epoch=1, batch_size=10) # test config model.get_config() # compare to TimeDistributedDense test_input = np.random.random((1, 3, 4)) test_output = model.predict(test_input) weights = model.layers[0].get_weights() reference = Sequential() reference.add(core.TimeDistributedDense(2, input_shape=(3, 4), weights=weights)) reference.add(core.Activation('relu')) reference.compile(optimizer='rmsprop', loss='mse') reference_output = reference.predict(test_input) assert_allclose(test_output, reference_output, atol=1e-05) # test when specifying a batch_input_shape reference = Sequential() reference.add(core.TimeDistributedDense(2, batch_input_shape=(1, 3, 4), weights=weights)) reference.add(core.Activation('relu')) reference.compile(optimizer='rmsprop', loss='mse') reference_output = reference.predict(test_input) assert_allclose(test_output, reference_output, atol=1e-05) # test with Convolution2D model = Sequential() model.add(wrappers.TimeDistributed(convolutional.Convolution2D(5, 2, 2, border_mode='same'), input_shape=(2, 3, 4, 4))) model.add(core.Activation('relu')) model.compile(optimizer='rmsprop', loss='mse') model.train_on_batch(np.random.random((1, 2, 3, 4, 4)), np.random.random((1, 2, 5, 4, 4))) model = model_from_json(model.to_json()) model.summary() # test stacked layers model = Sequential() model.add(wrappers.TimeDistributed(core.Dense(2), input_shape=(3, 4))) model.add(wrappers.TimeDistributed(core.Dense(3))) model.add(core.Activation('relu')) model.compile(optimizer='rmsprop', loss='mse') model.fit(np.random.random((10, 3, 4)), np.random.random((10, 3, 3)), nb_epoch=1, batch_size=10)
def test_loading_weights_by_name_and_reshape(): """ test loading model weights by name on: - sequential model """ # test with custom optimizer, loss custom_opt = optimizers.rmsprop custom_loss = losses.mse # sequential model model = Sequential() model.add(Conv2D(2, (1, 1), input_shape=(1, 1, 1), name='rick')) model.add(Flatten()) model.add(Dense(3, name='morty')) model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc']) x = np.random.random((1, 1, 1, 1)) y = np.random.random((1, 3)) model.train_on_batch(x, y) out = model.predict(x) old_weights = [layer.get_weights() for layer in model.layers] _, fname = tempfile.mkstemp('.h5') model.save_weights(fname) # delete and recreate model del(model) model = Sequential() model.add(Conv2D(2, (1, 1), input_shape=(1, 1, 1), name='rick')) model.add(Conv2D(3, (1, 1), name='morty')) model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc']) # load weights from first model with pytest.raises(ValueError): model.load_weights(fname, by_name=True, reshape=False) with pytest.raises(ValueError): model.load_weights(fname, by_name=False, reshape=False) model.load_weights(fname, by_name=False, reshape=True) model.load_weights(fname, by_name=True, reshape=True) os.remove(fname) out2 = model.predict(x) assert_allclose(np.squeeze(out), np.squeeze(out2), atol=1e-05) for i in range(len(model.layers)): new_weights = model.layers[i].get_weights() for j in range(len(new_weights)): # only compare layers that have weights, skipping Flatten() if old_weights[i]: assert_allclose(old_weights[i][j], new_weights[j], atol=1e-05)
class CartPoleController(object): def __init__(self, n_input=4, n_hidden=10, n_output=1, initial_state=0.1, training_threshold=1.5): self.n_input = n_input self.n_hidden = n_hidden self.n_output = n_output self.initial_state = initial_state self.training_threshold = training_threshold self.step_threshold = 0.5 # Action neural network # Dense input -> (1 x n_input) # LSTM -> (n_hidden) # Dense output -> (n_output) self.action_model = Sequential() self.action_model.add(LSTM(self.n_hidden, input_shape=(1, self.n_input))) self.action_model.add(Activation('tanh')) self.action_model.add(Dense(self.n_output)) self.action_model.add(Activation('sigmoid')) self.action_model.compile(loss='mse', optimizer='adam') def action(self, obs, prev_obs=None, prev_action=None): x = np.ndarray(shape=(1, 1, self.n_input)).astype(K.floatx()) if prev_obs is not None: prev_norm = np.linalg.norm(prev_obs) if prev_norm > self.training_threshold: # Compute a training step x[0, 0, :] = prev_obs if prev_norm < self.step_threshold: y = np.array([prev_action]).astype(K.floatx()) else: y = np.array([np.abs(prev_action - 1)]).astype(K.floatx()) self.action_model.train_on_batch(x, y) # Predict new value x[0, 0, :] = obs output = self.action_model.predict(x, batch_size=1) return self.step(output) def step(self, value): if value > self.step_threshold: return int(1) else: return int(0)
def test_sequential(self): print('Test sequential') model = Sequential() model.add(Dense(nb_hidden, input_shape=(input_dim,))) model.add(Activation('relu')) model.add(Dense(nb_class)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=1, validation_data=(X_test, y_test)) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=False, verbose=2, validation_data=(X_test, y_test)) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=2, validation_split=0.1) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=False, verbose=1, validation_split=0.1) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, shuffle=False) model.train_on_batch(X_train[:32], y_train[:32]) loss = model.evaluate(X_train, y_train, verbose=0) print('loss:', loss) if loss > 0.7: raise Exception('Score too low, learning issue.') model.predict(X_test, verbose=0) model.predict_classes(X_test, verbose=0) model.predict_proba(X_test, verbose=0) model.get_config(verbose=0) print('test weight saving') fname = 'test_sequential_temp.h5' model.save_weights(fname, overwrite=True) model = Sequential() model.add(Dense(nb_hidden, input_shape=(input_dim,))) model.add(Activation('relu')) model.add(Dense(nb_class)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model.load_weights(fname) os.remove(fname) nloss = model.evaluate(X_train, y_train, verbose=0) assert(loss == nloss) # test json serialization json_data = model.to_json() model = model_from_json(json_data) # test yaml serialization yaml_data = model.to_yaml() model = model_from_yaml(yaml_data)
def test_in_bounds_regularizer(): model = Sequential() model.add(InBounds(-1, 1, clip=True, input_shape=(1,))) model.compile('adam', 'mse') assert model.metrics_names == ['loss', 'reg'] loss, reg = model.train_on_batch(np.array([[0]]), np.array([[0]])) assert float(loss) == 0 loss_on_2, reg = model.train_on_batch(np.array([[2]]), np.array([[1]])) assert float(loss_on_2) > 0 loss_on_100, reg = model.train_on_batch(np.array([[100]]), np.array([[1]])) assert float(loss_on_2) < float(loss_on_100)
def test_learning_rate_multipliers_conv(): ''' Test learning rate multipliers on Convolutional layers ''' np.random.seed(seed) X_train = np.random.rand(10,3,10,10) y_train = np.random.rand(10,1,6,6) np.random.seed(seed) model0 = Sequential() model0.add(keras.layers.Convolution2D(5,3,3, input_shape=(3,10,10), border_mode='valid', activation='relu')) model0.add(keras.layers.Convolution2D(1,3,3, border_mode='valid')) model0.compile(loss='mse', optimizer='sgd') (m0w0_ini,m0b0_ini) = model0.layers[0].get_weights() (m0w1_ini,m0b1_ini) = model0.layers[1].get_weights() model0.train_on_batch(X_train, y_train) (m0w0_end,m0b0_end) = model0.layers[0].get_weights() (m0w1_end,m0b1_end) = model0.layers[1].get_weights() np.random.seed(seed) model1 = Sequential() model1.add(keras.layers.Convolution2D(5,3,3, input_shape=(3,10,10), border_mode='valid', W_learning_rate_multiplier=0.0, b_learning_rate_multiplier=0.0, activation='relu')) model1.add(keras.layers.Convolution2D(1,3,3, W_learning_rate_multiplier=0.5, b_learning_rate_multiplier=0.5, border_mode='valid')) model1.compile(loss='mse', optimizer='sgd') (m1w0_ini,m1b0_ini) = model1.layers[0].get_weights() (m1w1_ini,m1b1_ini) = model1.layers[1].get_weights() model1.train_on_batch(X_train, y_train) (m1w0_end,m1b0_end) = model1.layers[0].get_weights() (m1w1_end,m1b1_end) = model1.layers[1].get_weights() # This should be ~0.0 np.testing.assert_almost_equal(np.mean((m1w0_end - m1w0_ini)), 0.0, decimal=2) np.testing.assert_almost_equal(np.mean((m1b0_end - m1b0_ini)), 0.0, decimal=2) # This should be ~0.5 np.testing.assert_almost_equal(np.mean((m1w1_end - m1w1_ini)/(m0w1_end - m0w1_ini)), 0.5, decimal=2) np.testing.assert_almost_equal(np.mean((m1b1_end - m1b1_ini)/(m0b1_end - m0b1_ini)), 0.5, decimal=2)
def test_merge_overlap(): (X_train, y_train), (X_test, y_test) = _get_test_data() left = Sequential() left.add(Dense(nb_hidden, input_shape=(input_dim,))) left.add(Activation('relu')) model = Sequential() model.add(Merge([left, left], mode='sum')) model.add(Dense(nb_class)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, validation_data=(X_test, y_test)) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=2, validation_split=0.1) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, shuffle=False) model.train_on_batch(X_train[:32], y_train[:32]) loss = model.evaluate(X_test, y_test, verbose=0) model.predict(X_test, verbose=0) model.predict_classes(X_test, verbose=0) model.predict_proba(X_test, verbose=0) fname = 'test_merge_overlap_temp.h5' print(model.layers) model.save_weights(fname, overwrite=True) print(model.trainable_weights) model.load_weights(fname) os.remove(fname) nloss = model.evaluate(X_test, y_test, verbose=0) assert(loss == nloss) # test serialization config = model.get_config() Sequential.from_config(config) model.summary() json_str = model.to_json() model_from_json(json_str) yaml_str = model.to_yaml() model_from_yaml(yaml_str)
def test_loading_weights_by_name_skip_mismatch(): """ test skipping layers while loading model weights by name on: - sequential model """ # test with custom optimizer, loss custom_opt = optimizers.rmsprop custom_loss = losses.mse # sequential model model = Sequential() model.add(Dense(2, input_shape=(3,), name='rick')) model.add(Dense(3, name='morty')) model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc']) x = np.random.random((1, 3)) y = np.random.random((1, 3)) model.train_on_batch(x, y) out = model.predict(x) old_weights = [layer.get_weights() for layer in model.layers] _, fname = tempfile.mkstemp('.h5') model.save_weights(fname) # delete and recreate model del(model) model = Sequential() model.add(Dense(2, input_shape=(3,), name='rick')) model.add(Dense(4, name='morty')) # different shape w.r.t. previous model model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc']) # load weights from first model with pytest.warns(UserWarning): # expect UserWarning for skipping weights model.load_weights(fname, by_name=True, skip_mismatch=True) os.remove(fname) # assert layers 'rick' are equal for old, new in zip(old_weights[0], model.layers[0].get_weights()): assert_allclose(old, new, atol=1e-05) # assert layers 'morty' are not equal, since we skipped loading this layer for old, new in zip(old_weights[1], model.layers[1].get_weights()): assert_raises(AssertionError, assert_allclose, old, new, atol=1e-05)
def test_loading_weights_by_name(): """ test loading model weights by name on: - sequential model """ # test with custom optimizer, loss custom_opt = optimizers.rmsprop custom_loss = losses.mse # sequential model model = Sequential() model.add(Dense(2, input_shape=(3,), name='rick')) model.add(Dense(3, name='morty')) model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc']) x = np.random.random((1, 3)) y = np.random.random((1, 3)) model.train_on_batch(x, y) out = model.predict(x) old_weights = [layer.get_weights() for layer in model.layers] _, fname = tempfile.mkstemp('.h5') model.save_weights(fname) # delete and recreate model del(model) model = Sequential() model.add(Dense(2, input_shape=(3,), name='rick')) model.add(Dense(3, name='morty')) model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc']) # load weights from first model model.load_weights(fname, by_name=True) os.remove(fname) out2 = model.predict(x) assert_allclose(out, out2, atol=1e-05) for i in range(len(model.layers)): new_weights = model.layers[i].get_weights() for j in range(len(new_weights)): assert_allclose(old_weights[i][j], new_weights[j], atol=1e-05)
def test_learning_rate_multipliers_dense(): ''' Test learning rate multipliers on Dense layers ''' (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=10, nb_test=1, input_shape=(4,), classification=True, nb_class=2) y_train = to_categorical(y_train) y_test = to_categorical(y_test) np.random.seed(seed) model0 = Sequential() model0.add(keras.layers.Dense(output_dim=5, input_dim=4, activation='relu')) model0.add(keras.layers.Dense(output_dim=2, activation='softmax')) model0.compile(loss='categorical_crossentropy', optimizer='sgd') (m0w0_ini,m0b0_ini) = model0.layers[0].get_weights() (m0w1_ini,m0b1_ini) = model0.layers[1].get_weights() model0.train_on_batch(X_train, y_train) (m0w0_end,m0b0_end) = model0.layers[0].get_weights() (m0w1_end,m0b1_end) = model0.layers[1].get_weights() np.random.seed(seed) model1 = Sequential() model1.add(keras.layers.Dense(output_dim=5, input_dim=4, activation='relu', W_learning_rate_multiplier=0.0, b_learning_rate_multiplier=0.0)) model1.add(keras.layers.Dense(output_dim=2, activation='softmax', W_learning_rate_multiplier=0.5, b_learning_rate_multiplier=0.5)) model1.compile(loss='categorical_crossentropy', optimizer='sgd') (m1w0_ini,m1b0_ini) = model1.layers[0].get_weights() (m1w1_ini,m1b1_ini) = model1.layers[1].get_weights() model1.train_on_batch(X_train, y_train) (m1w0_end,m1b0_end) = model1.layers[0].get_weights() (m1w1_end,m1b1_end) = model1.layers[1].get_weights() # This should be ~0.0 np.testing.assert_almost_equal(np.mean((m1w0_end - m1w0_ini)), 0.0, decimal=2) np.testing.assert_almost_equal(np.mean((m1b0_end - m1b0_ini)), 0.0, decimal=2) # This should be ~0.5 np.testing.assert_almost_equal(np.mean((m1w1_end - m1w1_ini)/(m0w1_end - m0w1_ini)), 0.5, decimal=2) np.testing.assert_almost_equal(np.mean((m1b1_end - m1b1_ini)/(m0b1_end - m0b1_ini)), 0.5, decimal=2)
def train_mlp(self, input, output): self.in_real = input.data['real'] self.in_imag = input.data['imag'] self.out_real = output.data['real'] self.out_imag = output.data['imag'] (i_dim_x, i_dim_y, i_dim_z) = self.in_real.shape in_dim = i_dim_x*i_dim_y*i_dim_z input_data = self.in_real.reshape(in_dim, 1) (o_dim_x, o_dim_y, o_dim_z) = self.out_real.shape out_dim = o_dim_x*o_dim_y*o_dim_z output_data = self.out_real.reshape(out_dim, 1) model = Sequential() model.add(Dense(200, input_dim=in_dim, init='uniform')) model.add(Activation('relu')) # model.add(Dropout(0.25)) model.add(Dense(200))#, init='uniform')) model.add(Activation('relu')) # model.add(Dropout(0.25)) model.add(Dense(out_dim))#, init='uniform')) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='sgd',\ metrics=['accuracy']) early_stop = EarlyStopping(monitor='val_loss', patience=2) hist = model.fit(input_data, output_data, nb_epoch=50, \ batch_size=64, validation_split=0.2, \ shuffle=True, callbacks=[early_stop]) print(hist.history) #TODO: batch train model.train_on_batch() # Save model model_to_save_json = model.to_json() open('model_architecture.json', 'w').write(model_to_save_json) model_to_save_yaml = model.to_yaml() open('model_architecture.yaml', 'w').write(model_to_save_yaml) model.save_weights('weights.h5')
def test_sequential_model_pickling_2(): # test with custom optimizer, loss custom_opt = optimizers.rmsprop custom_loss = losses.mse model = Sequential() model.add(Dense(2, input_shape=(3,))) model.add(Dense(3)) model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc']) x = np.random.random((1, 3)) y = np.random.random((1, 3)) model.train_on_batch(x, y) out = model.predict(x) state = pickle.dumps(model) model = pickle.loads(state) out2 = model.predict(x) assert_allclose(out, out2, atol=1e-05)
def test_trainable_argument(): x = np.random.random((5, 3)) y = np.random.random((5, 2)) model = Sequential() model.add(Dense(2, input_dim=3, trainable=False)) model.compile('rmsprop', 'mse') out = model.predict(x) model.train_on_batch(x, y) out_2 = model.predict(x) assert_allclose(out, out_2) # test with nesting input = Input(shape=(3,)) output = model(input) model = Model(input, output) model.compile('rmsprop', 'mse') out = model.predict(x) model.train_on_batch(x, y) out_2 = model.predict(x) assert_allclose(out, out_2)
def test_nested_sequential(in_tmpdir): (x_train, y_train), (x_test, y_test) = _get_test_data() inner = Sequential() inner.add(Dense(num_hidden, input_shape=(input_dim, ))) inner.add(Activation('relu')) inner.add(Dense(num_classes)) middle = Sequential() middle.add(inner) model = Sequential() model.add(middle) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2, validation_split=0.1) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=0) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, shuffle=False) model.train_on_batch(x_train[:32], y_train[:32]) loss = model.evaluate(x_test, y_test, verbose=0) model.predict(x_test, verbose=0) model.predict_classes(x_test, verbose=0) model.predict_proba(x_test, verbose=0) fname = 'test_nested_sequential_temp.h5' model.save_weights(fname, overwrite=True) inner = Sequential() inner.add(Dense(num_hidden, input_shape=(input_dim, ))) inner.add(Activation('relu')) inner.add(Dense(num_classes)) middle = Sequential() middle.add(inner) model = Sequential() model.add(middle) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model.load_weights(fname) os.remove(fname) nloss = model.evaluate(x_test, y_test, verbose=0) assert (loss == nloss) # test serialization config = model.get_config() Sequential.from_config(config) model.summary() json_str = model.to_json() model_from_json(json_str) yaml_str = model.to_yaml() model_from_yaml(yaml_str)
def stuff(alpha, nums_hidden, seed, dim, nums_epochs): # Config model s = {'dataset':'atis.pkl', 'lr':alpha, 'nhidden': nums_hidden, # number of hidden units 'batch': seed, 'emb_dimension': dim, # dimension of word embedding 'nepochs': nums_epochs} # Make result folder, contain: model file, result.json, output file for validation and train, also save config.json again folder = os.path.basename("Result").split('.')[0] if not os.path.exists(folder): os.mkdir(folder) write_JSON(s,folder + '/config.json') # load the dataset train_set, test_set, dic = load_atis(s['dataset']) # Convert for index # Vocabulary of meaningful words and labels are covered in dic data of Atis datset idx2label = dict((k,v) for v,k in dic['labels2idx'].items()) idx2word = dict((k,v) for v,k in dic['words2idx'].items()) # Seperate data for hold-out: train set, validation set #words2idx, tables2idx and labels2idx (use only vocabs and labels. Tables will list all meaning of words_which not neccessary) train_words, train_tables, train_labels = train_set valid_words = train_words[0:499] valid_tables = train_tables[0:499] valid_labels = train_labels[0:499] train_words = train_words[500:len(train_words)] train_tables = train_tables[500:len(train_tables)] train_labels = train_labels[500:len(train_labels)] # Print some info #print('Train set:',str(len(train_words)), 'sentences') #print('Validation set:',str(len(valid_words)),'sentences') # Some para use in 'for loop' vocsize = len(dic['words2idx']) nclasses = len(dic['labels2idx']) nsentences = len(train_words) #print('Nums of vocabulary get from words of each sentence: ', vocsize) #print('Nums of slot: ', nclasses) #print('Nums of sentence use for training: ', nsentences) # instanciate the model (for randomize duel to bath size_ optimization convergence) np.random.seed(s['batch']) random.seed(s['batch']) #Making model model = Sequential() #Init model.add(Embedding(vocsize, s['emb_dimension'])) # Word Embedding model.add(SimpleRNN(s['nhidden'], activation='sigmoid', return_sequences=True)) # Recurrent use Sigmoid Activation model.add(TimeDistributed(Dense(output_dim=nclasses))) # For making Dense Layer (Context Layer) keep updating model.add(Activation("softmax")) # Softmax activation for classification adam = Adam(lr=s['lr'], beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) # Adam optimizer (some hyperparameter will be locked) model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy']) # Lost funct: Cross entropy # Train for e in range(s['nepochs']): # shuffle shuffle([train_words, train_tables, train_labels], s['batch']) s['ce'] = e for i in range(nsentences): X = np.asarray([train_words[i]]) Y = to_categorical(np.asarray(train_labels[i])[:, np.newaxis],nclasses)[np.newaxis, :, :] if X.shape[1] == 1: continue # bug with X, Y of len 1 model.train_on_batch(X, Y) # Evaluation // back into meaning word for output prediction : idx -> words # Train predictions_train = [map(lambda x: idx2label[x], model.predict_on_batch( np.asarray([x])).argmax(2)[0]) for x in train_words] groundtruth_train = [ map(lambda x: idx2label[x], y) for y in train_labels ] words_train = [ map(lambda x: idx2word[x], w) for w in train_words] # Validatae predictions_valid = [map(lambda x: idx2label[x], model.predict_on_batch( np.asarray([x])).argmax(2)[0]) for x in valid_words] groundtruth_valid = [ map(lambda x: idx2label[x], y) for y in valid_labels ] words_valid = [ map(lambda x: idx2word[x], w) for w in valid_words] # Evaluation valid_error = acc_measurement(predictions_valid, groundtruth_valid, words_valid, folder + '/current.valid.txt') train_error = acc_measurement(predictions_train, groundtruth_train, words_train, folder + '/current.train.txt') # Save weight in file 'model.h5' as HDF5 file (default), can be load by: model.load_weights('model.h5', by_name=False) model.save_weights(folder +'/model_weight.h5', overwrite=True) #print ('MODEL built at epoch = ', e, ', error in validation set = ', valid_error) s['current_valid_error'] = valid_error s['current_train_error'] = train_error # Make output file if os.path.exists(folder + '/valid.txt'): os.remove(folder + '/valid.txt') if os.path.exists(folder + '/train.txt'): os.remove(folder + '/train.txt') os.rename(folder + '/current.valid.txt',folder + '/valid.txt') os.rename(folder + '/current.train.txt',folder + '/train.txt') result = read_JSON('result.json') result['validation error'] = float(s['current_valid_error']) result['train error'] = float(s['current_train_error']) write_JSON(result,folder + '/result.json') #Print final result model print ('RESULT MODEL built at epoch = ', e, ',error in validation set = ', s['current_valid_error'], ',error in train set = ', s['current_train_error']) print('\n')
def test_convolutional_recurrent_statefulness(): data_format = 'channels_last' return_sequences = False inputs = np.random.rand(num_samples, sequence_len, input_num_row, input_num_col, input_channel) # Tests for statefulness model = Sequential() kwargs = {'data_format': data_format, 'return_sequences': return_sequences, 'filters': filters, 'kernel_size': (num_row, num_col), 'stateful': True, 'batch_input_shape': inputs.shape, 'padding': 'same'} layer = convolutional_recurrent.ConvLSTM2D(**kwargs) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones_like(inputs)) # train once so that the states change model.train_on_batch(np.ones_like(inputs), np.random.random(out1.shape)) out2 = model.predict(np.ones_like(inputs)) # if the state is not reset, output should be different assert(out1.max() != out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones_like(inputs)) assert(out2.max() != out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones_like(inputs)) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones_like(inputs)) assert(out4.max() != out5.max()) # cntk doesn't support eval convolution with static # variable, will enable it later if K.backend() != 'cntk': # check regularizers kwargs = {'data_format': data_format, 'return_sequences': return_sequences, 'kernel_size': (num_row, num_col), 'stateful': True, 'filters': filters, 'batch_input_shape': inputs.shape, 'kernel_regularizer': regularizers.L1L2(l1=0.01), 'recurrent_regularizer': regularizers.L1L2(l1=0.01), 'bias_regularizer': 'l2', 'activity_regularizer': 'l2', 'kernel_constraint': 'max_norm', 'recurrent_constraint': 'max_norm', 'bias_constraint': 'max_norm', 'padding': 'same'} layer = convolutional_recurrent.ConvLSTM2D(**kwargs) layer.build(inputs.shape) assert len(layer.losses) == 3 assert layer.activity_regularizer output = layer(K.variable(np.ones(inputs.shape))) assert len(layer.losses) == 4 K.eval(output) # check dropout layer_test(convolutional_recurrent.ConvLSTM2D, kwargs={'data_format': data_format, 'return_sequences': return_sequences, 'filters': filters, 'kernel_size': (num_row, num_col), 'padding': 'same', 'dropout': 0.1, 'recurrent_dropout': 0.1}, input_shape=inputs.shape) # check state initialization layer = convolutional_recurrent.ConvLSTM2D( filters=filters, kernel_size=(num_row, num_col), data_format=data_format, return_sequences=return_sequences) layer.build(inputs.shape) x = Input(batch_shape=inputs.shape) initial_state = layer.get_initial_state(x) y = layer(x, initial_state=initial_state) model = Model(x, y) assert (model.predict(inputs).shape == layer.compute_output_shape(inputs.shape))
#=========================================train data======================================================# k = 3 sentences_list, sentences_properties_list = reader(training_data_path) total_window, sentences_NE_tag = data_preprocessing(k, sentences_list, sentences_properties_list) x_index = [] # in order to show cost y_index = [] input_vector = transform_window_form(total_window) output_y = transform_tag_form(sentences_NE_tag) for epoch in range(100): cost = model.train_on_batch(input_vector, output_y) x_index.append(epoch) y_index.append(cost) print(cost) plt.plot(x_index, y_index) #plt.show() #!!!!!!!!!the below is the method which train a single sample each time, the cost not convergence, #!!!!!!!!!!! so adapt to the upper method, train 100 epochs, each time train 62404 samples #=========================================test data======================================================# #get test data test_sentences_list, test_sentences_properties_list = reader(test_data_path) test_total_window, test_sentences_NE_tag = data_preprocessing( k, test_sentences_list, test_sentences_properties_list) test_input_vector = transform_window_form(test_total_window)
class Learner(object): ''' This agent jumps randomly. ''' def __init__(self): self.last_state = None self.last_action = None self.last_reward = None self.isFirstState = True # to catch weirdness for first state of game self.isSecondState = False # update gravity on *second* state #self.Q = {} # initialize Q table. use a dictionary for now self.gamma = 1 # temporal discount value? finite horizon so maybe we don't need this? #self.eps0 = 1 # do random action 5% of the time, for exploration? self.eps = 0.2 # start more random self.g = 0 self.alpha = 0.5 ## initialize neural network to random weights # this should be optimized still self.model = Sequential() self.model.add(Dense(output_dim = 100, batch_input_shape=(1,5), init = 'lecun_uniform' )) self.model.add(Activation("relu")) # two outputs for 2 actions! self.model.add(Dense(1, init='lecun_uniform')) self.model.add(Activation("linear")) #linear output so we can have range of real-valued outputs ## initialize model rms = RMSprop() self.model.compile(loss='mse', optimizer=rms) #sgd = SGD(lr=0.001) #self.model.compile(loss='mean_squared_error', optimizer=sgd) def reset(self, ii): self.last_state = None self.last_action = None self.last_reward = None self.g = 0 # decrement epsilon value? #self.eps = float(self.eps0)/float(ii+1) self.eps = max([0.01, self.eps-0.001]) # always do at least 10% random actions self.isFirstState = True self.isSecondState = False def getFeats(self, state, action): # takes state dictionary + action and converts to features # to feed into NN v = state['monkey']['vel'] rx = state['tree']['dist'] ry = state['monkey']['bot']-state['tree']['bot'] h = state['monkey']['bot'] # coarse-grain position here, if necessary? #dx = 1 #dy = 1 #rx = np.round(float(rx)/float(dx)) #ry = np.round(float(ry)/float(dy)) #h = np.round(float(h)/float(dy)) #instead: normalize to max values? rx = float(rx)/float(300) # max dist of 300 ry = float(ry)/float(200) # max diff here is +/- 200? v = float(v) / float(10) # guessing it's about 10ish max/min? g = float(self.g)/float(4) # 4 values? # can also coarse-grain velocity? #dv = 1 #v = np.round(float(v)/float(dv)) #tmp = [g, v, rx, ry, h] # 5-dimensional feature vector # now: try excluding height # also for now: ignore g? tmp = [g, v, rx, ry, action] # convert to 1x6 numpy array that NN expects feat = np.ndarray([1,5]) # 5 or however many features there are feat[:] = tmp return feat def QNet(self, feat): # uses a neural network to approximate Q using a state-action pair (s,a) return self.model.predict(feat) def action_callback(self, state): ''' Implement this function to learn things and take actions. Return 0 if you don't want to jump and 1 if you do. ''' #print state # first, just sit tight if it's the first state if self.isFirstState: self.last_state = state self.last_action = 0 #self.g = -state['monkey']['vel'] #self.g = 1 # to train more quickly? # don't jump on the first state self.isFirstState = False self.isSecondState = True #self.model.train_on_batch(self.getFeats(self.last_state), 0*np.random.rand(1,2)) return 0 # if second state, then update gravity if self.isSecondState: self.g = -state['monkey']['vel'] self.isSecondState = False # You might do some learning here based on the current state and the last state. # You'll need to select and action and return it. # Return 0 to swing and 1 to jump. ## find Q values for old state Q0 = self.QNet(self.getFeats(self.last_state, self.last_action)) # and for new state! Qstay = self.QNet(self.getFeats(state,0)) Qjump = self.QNet(self.getFeats(state,1)) # take max if Qjump > Qstay: # if Q value higher for jumping new_action = 1 Qmax = Qjump else: # otherwise, don't jump new_action = 0 Qmax = Qstay #print Qstay, Qjump, new_action # update target vector? Qtarg = self.last_reward + Qmax # gradient descent to update weights #self.model.fit(self.getFeats(self.last_state), Qtarg, batch_size = 1, nb_epoch = 1, verbose = 0 ) self.model.train_on_batch(self.getFeats(self.last_state, self.last_action), Qtarg) # epsilon greedy: with probability epsiolon, overwrite new action w random one if npr.rand() < self.eps: # then choose randomly new_action = npr.rand() < 0.5 # update last action and state self.last_action = new_action self.last_state = state # and return action return self.last_action def reward_callback(self, reward): '''This gets called so you can see what reward you get.''' self.last_reward = reward
def main(): start_time = time.time() parser = argparse.ArgumentParser( prog='trainLSTM_MLP.py', description='Train LSTM-MLP model for visual question answering') parser.add_argument('--mlp-hidden-units', type=int, default=1024, metavar='<mlp-hidden-units>') parser.add_argument('--lstm-hidden-units', type=int, default=512, metavar='<lstm-hidden-units>') parser.add_argument('--mlp-hidden-layers', type=int, default=3, metavar='<mlp-hidden-layers>') parser.add_argument('--lstm-hidden-layers', type=int, default=1, metavar='<lstm-hidden-layers>') parser.add_argument('--dropout', type=float, default=0.5, metavar='<dropout-rate>') parser.add_argument('--mlp-activation', type=str, default='tanh', metavar='<activation-function>') parser.add_argument('--num-epochs', type=int, default=100, metavar='<num-epochs>') parser.add_argument('--batch-size', type=int, default=128, metavar='<batch-size>') parser.add_argument('--learning-rate', type=float, default=0.001, metavar='<learning-rate>') parser.add_argument('--dev-accuracy-path', type=str, required=True, metavar='<accuracy-path>') args = parser.parse_args() word_vec_dim = 300 img_dim = 2048 max_len = 30 cap_max_len = 100 ###################### # Load Data # ###################### data_dir = '/home/mlds/data/0.05_val/' print('Loading data...') train_q_ids, train_image_ids = LoadIds('train', data_dir) dev_q_ids, dev_image_ids = LoadIds('dev', data_dir) train_questions = LoadQuestions('train', data_dir) dev_questions = LoadQuestions('dev', data_dir) train_choices = LoadChoices('train', data_dir) dev_choices = LoadChoices('dev', data_dir) train_answers = LoadAnswers('train', data_dir) dev_answers = LoadAnswers('dev', data_dir) caption_map = LoadCaptions('train') print('Finished loading data.') print('Time: %f s' % (time.time() - start_time)) ###################### # Model Descriptions # ###################### print('Generating and compiling model...') # image model (CNN features) image_model = Sequential() image_model.add(Reshape(input_shape=(img_dim, ), dims=(img_dim, ))) # language model (LSTM) language_model = Sequential() if args.lstm_hidden_layers == 1: language_model.add( LSTM(output_dim=args.lstm_hidden_units, return_sequences=False, input_shape=(max_len, word_vec_dim))) else: language_model.add( LSTM(output_dim=args.lstm_hidden_units, return_sequences=True, input_shape=(max_len, word_vec_dim))) for i in range(args.lstm_hidden_layers - 2): language_model.add( LSTM(output_dim=args.lstm_hidden_units, return_sequences=True)) language_model.add( LSTM(output_dim=args.lstm_hidden_units, return_sequences=False)) # caption model (LSTM) caption_model = Sequential() if args.lstm_hidden_layers == 1: caption_model.add( LSTM(output_dim=args.lstm_hidden_units, return_sequences=False, input_shape=(cap_max_len, word_vec_dim))) else: caption_model.add( LSTM(output_dim=args.lstm_hidden_units, return_sequences=True, input_shape=(cap_max_len, word_vec_dim))) for i in range(args.lstm_hidden_layers - 2): caption_model.add( LSTM(output_dim=args.lstm_hidden_units, return_sequences=True)) caption_model.add( LSTM(output_dim=args.lstm_hidden_units, return_sequences=False)) # feedforward model (MLP) model = Sequential() model.add( Merge([language_model, caption_model, image_model], mode='concat', concat_axis=1)) for i in range(args.mlp_hidden_layers): model.add(Dense(args.mlp_hidden_units, init='uniform')) model.add(Activation(args.mlp_activation)) model.add(Dropout(args.dropout)) model.add(Dense(word_vec_dim)) #model.add(Activation('softmax')) json_string = model.to_json() model_filename = 'models/inception_lstm_units_%i_layers_%i_mlp_units_%i_layers_%i_%s_lr%.1e_dropout%.2f.caption' % ( args.lstm_hidden_units, args.lstm_hidden_layers, args.mlp_hidden_units, args.mlp_hidden_layers, args.mlp_activation, args.learning_rate, args.dropout) open(model_filename + '.json', 'w').write(json_string) # loss and optimizer rmsprop = RMSprop(lr=args.learning_rate) #model.compile(loss='categorical_crossentropy', optimizer=rmsprop) model.compile(loss=Loss, optimizer=rmsprop) print('Compilation finished.') print('Time: %f s' % (time.time() - start_time)) ######################################## # Load CNN Features and Word Vectors # ######################################## # load Inception features print('Loading Inception features...') INC_features, img_map = LoadInceptionFeatures() print('Inception features loaded') print('Time: %f s' % (time.time() - start_time)) # load GloVe vectors print('Loading GloVe vectors...') word_embedding, word_map = LoadGloVe() print('GloVe vectors loaded') print('Time: %f s' % (time.time() - start_time)) ###################### # Make Batches # ###################### print('Making batches...') # training batches train_question_batches = [ b for b in MakeBatches( train_questions, args.batch_size, fillvalue=train_questions[-1]) ] train_answer_batches = [ b for b in MakeBatches(train_answers['toks'], args.batch_size, fillvalue=train_answers['toks'][-1]) ] train_image_batches = [ b for b in MakeBatches( train_image_ids, args.batch_size, fillvalue=train_image_ids[-1]) ] #train_qid_batches = [ b for b in MakeBatches(train_q_ids, args.batch_size, fillvalue=train_q_ids[-1]) ] train_indices = list(range(len(train_question_batches))) # validation batches dev_question_batches = [ b for b in MakeBatches( dev_questions, args.batch_size, fillvalue=dev_questions[-1]) ] dev_answer_batches = [ b for b in MakeBatches(dev_answers['labs'], args.batch_size, fillvalue=dev_answers['labs'][-1]) ] dev_choice_batches = [ b for b in MakeBatches( dev_choices, args.batch_size, fillvalue=dev_choices[-1]) ] #dev_qid_batches = [ b for b in MakeBatches(dev_q_ids, args.batch_size, fillvalue=dev_q_ids[-1]) ] dev_image_batches = [ b for b in MakeBatches( dev_image_ids, args.batch_size, fillvalue=dev_image_ids[-1]) ] print('Finished making batches.') print('Time: %f s' % (time.time() - start_time)) ###################### # Training # ###################### acc_file = open(args.dev_accuracy_path, 'w') dev_accs = [] max_acc = -1 max_acc_epoch = -1 # define interrupt handler def PrintDevAcc(): print('Max validation accuracy epoch: %i' % max_acc_epoch) print(dev_accs) def InterruptHandler(sig, frame): print(str(sig)) PrintDevAcc() sys.exit(-1) signal.signal(signal.SIGINT, InterruptHandler) signal.signal(signal.SIGTERM, InterruptHandler) # print training information print('-' * 80) print('Training Information') print('# of LSTM hidden units: %i' % args.lstm_hidden_units) print('# of LSTM hidden layers: %i' % args.lstm_hidden_layers) print('# of MLP hidden units: %i' % args.mlp_hidden_units) print('# of MLP hidden layers: %i' % args.mlp_hidden_layers) print('Dropout: %f' % args.dropout) print('MLP activation function: %s' % args.mlp_activation) print('# of training epochs: %i' % args.num_epochs) print('Batch size: %i' % args.batch_size) print('Learning rate: %f' % args.learning_rate) print('# of train questions: %i' % len(train_questions)) print('# of dev questions: %i' % len(dev_questions)) print('-' * 80) acc_file.write('-' * 80 + '\n') acc_file.write('Training Information\n') acc_file.write('# of LSTM hidden units: %i\n' % args.lstm_hidden_units) acc_file.write('# of LSTM hidden layers: %i\n' % args.lstm_hidden_layers) acc_file.write('# of MLP hidden units: %i\n' % args.mlp_hidden_units) acc_file.write('# of MLP hidden layers: %i\n' % args.mlp_hidden_layers) acc_file.write('Dropout: %f\n' % args.dropout) acc_file.write('MLP activation function: %s\n' % args.mlp_activation) acc_file.write('# of training epochs: %i\n' % args.num_epochs) acc_file.write('Batch size: %i\n' % args.batch_size) acc_file.write('Learning rate: %f\n' % args.learning_rate) acc_file.write('# of train questions: %i\n' % len(train_questions)) acc_file.write('# of dev questions: %i\n' % len(dev_questions)) acc_file.write('-' * 80 + '\n') # start training print('Training started...') for k in range(args.num_epochs): print('-' * 80) print('Epoch %i' % (k + 1)) progbar = generic_utils.Progbar(len(train_indices) * args.batch_size) # shuffle batch indices random.shuffle(train_indices) for i in train_indices: X_question_batch = GetQuestionsTensor(train_question_batches[i], word_embedding, word_map) X_image_batch = GetImagesMatrix(train_image_batches[i], img_map, INC_features) X_caption_batch = GetCaptionsTensor(train_image_batches[i], word_embedding, word_map, caption_map) Y_answer_batch = GetAnswersMatrix(train_answer_batches[i], word_embedding, word_map) loss = model.train_on_batch( [X_question_batch, X_caption_batch, X_image_batch], Y_answer_batch) loss = loss[0].tolist() progbar.add(args.batch_size, values=[('train loss', loss)]) print('Time: %f s' % (time.time() - start_time)) # evaluate on dev set pbar = generic_utils.Progbar( len(dev_question_batches) * args.batch_size) dev_correct = 0 # feed forward for i in range(len(dev_question_batches)): X_question_batch = GetQuestionsTensor(dev_question_batches[i], word_embedding, word_map) X_image_batch = GetImagesMatrix(dev_image_batches[i], img_map, INC_features) X_caption_batch = GetCaptionsTensor(dev_image_batches[i], word_embedding, word_map, caption_map) prob = model.predict_proba( [X_question_batch, X_caption_batch, X_image_batch], args.batch_size, verbose=0) # get word vecs of choices choice_feats = GetChoicesTensor(dev_choice_batches[i], word_embedding, word_map) similarity = np.zeros((5, args.batch_size), float) # calculate cosine distances for j in range(5): similarity[j] = np.diag( cosine_similarity(prob, choice_feats[j])) # take argmax of cosine distances pred = np.argmax(similarity, axis=0) + 1 if i != (len(dev_question_batches) - 1): dev_correct += np.count_nonzero(dev_answer_batches[i] == pred) else: num_padding = args.batch_size * len( dev_question_batches) - len(dev_questions) last_idx = args.batch_size - num_padding dev_correct += np.count_nonzero( dev_answer_batches[:last_idx] == pred[:last_idx]) pbar.add(args.batch_size) dev_acc = float(dev_correct) / len(dev_questions) dev_accs.append(dev_acc) print('Validation Accuracy: %f' % dev_acc) print('Time: %f s' % (time.time() - start_time)) if dev_acc > max_acc: max_acc = dev_acc max_acc_epoch = k model.save_weights(model_filename + '_best.hdf5', overwrite=True) #model.save_weights(model_filename + '_epoch_{:03d}.hdf5'.format(k+1)) print(dev_accs) for acc in dev_accs: acc_file.write('%f\n' % acc) print('Best validation accuracy: %f; epoch#%i' % (max_acc, (max_acc_epoch + 1))) acc_file.write('Best validation accuracy: %f; epoch#%i\n' % (max_acc, (max_acc_epoch + 1))) print('Training finished.') acc_file.write('Training finished.\n') print('Time: %f s' % (time.time() - start_time)) acc_file.write('Time: %f s\n' % (time.time() - start_time)) acc_file.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('-num_hidden_units_mlp', type=int, default=1024) parser.add_argument('-num_hidden_units_lstm', type=int, default=512) parser.add_argument('-num_hidden_layers_mlp', type=int, default=3) parser.add_argument('-dropout', type=float, default=0.5) parser.add_argument('-activation_mlp', type=str, default='tanh') #TODO Feature parser.add_argument('-language_only', type=bool, default= False) args = parser.parse_args() word_vec_dim = 300 img_dim = 4096 max_len = 30 nb_classes = 1000 #get the data questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines() questions_lengths_train = open( '../data/preprocessed/questions_lengths_train2014.txt', 'r').read().decode('utf8').splitlines() answers_train = open('../data/preprocessed/answers_train2014.txt', 'r').read().decode('utf8').splitlines() images_train = open('../data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines() vgg_model_path = '../features/coco/vgg_feats.mat' maxAnswers = 1000 questions_train, answers_train, images_train = selectFrequentAnswers( questions_train, answers_train, images_train, maxAnswers) questions_lengths_train, questions_train, answers_train, images_train = ( list(t) for t in zip(*sorted( zip(questions_lengths_train, questions_train, answers_train, images_train)))) #encode the remaining answers labelencoder = preprocessing.LabelEncoder() labelencoder.fit(answers_train) nb_classes = len(list(labelencoder.classes_)) joblib.dump(labelencoder, '../models/labelencoder.pkl') #defining our LSTM based model image_model = Sequential() image_model.add(Reshape(input_shape=(img_dim, ), dims=(img_dim, ))) #print image_model.output_shape #512 hidden units in LSTM layer. 300-dimnensional word vectors. language_model = Sequential() language_model.add( LSTM(output_dim=args.num_hidden_units_lstm, return_sequences=False, input_shape=(max_len, word_vec_dim))) #print language_model.output_shape model = Sequential() model.add( Merge([language_model, image_model], mode='concat', concat_axis=1)) print model.output_shape for i in xrange(args.num_hidden_layers_mlp): model.add(Dense(args.num_hidden_units_mlp, init='uniform')) model.add(Activation(args.activation_mlp)) model.add(Dropout(args.dropout)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) json_string = model.to_json() model_file_name = '../models/lstm_1_num_hidden_units_lstm_' + str( args.num_hidden_units_lstm) + '_num_hidden_units_mlp_' + str( args.num_hidden_units_mlp) + '_num_hidden_layers_mlp_' + str( args.num_hidden_layers_mlp) open(model_file_name + '.json', 'w').write(json_string) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') print 'Compilation done' features_struct = scipy.io.loadmat(vgg_model_path) VGGfeatures = features_struct['feats'] print 'loaded vgg features' image_ids = open('../features/coco/coco_vgg_IDMap.txt').read().splitlines() img_map = {} for ids in image_ids: id_split = ids.split() img_map[id_split[0]] = int(id_split[1]) nlp = English() print 'loaded word2vec features...' ## training print 'Training started...' numEpochs = 100 model_save_interval = 10 batchSize = 128 for k in xrange(numEpochs): progbar = generic_utils.Progbar(len(questions_train)) for qu_batch, an_batch, im_batch in zip( grouper(questions_train, batchSize, fillvalue=questions_train[0]), grouper(answers_train, batchSize, fillvalue=answers_train[0]), grouper(images_train, batchSize, fillvalue=images_train[0])): timesteps = len(nlp( qu_batch[-1])) #questions sorted in descending order of length X_q_batch = get_questions_tensor_timeseries( qu_batch, nlp, timesteps) X_i_batch = get_images_matrix(im_batch, img_map, VGGfeatures) Y_batch = get_answers_matrix(an_batch, labelencoder) loss = model.train_on_batch([X_q_batch, X_i_batch], Y_batch) progbar.add(batchSize, values=[("train loss", loss)]) if k % model_save_interval == 0: model.save_weights(model_file_name + '_epoch_{:03d}.hdf5'.format(k)) model.save_weights(model_file_name + '_epoch_{:03d}.hdf5'.format(k))
def train(self, list_sigs): # Load train data (=ga result). X_train = [] X_train = np.array(list_sigs) X_train = (X_train.astype(np.float32) - self.flt_size) / self.flt_size # Build discriminator. discriminator = self.discriminator_model() d_opt = SGD(lr=0.1, momentum=0.1, decay=1e-5) discriminator.compile(loss='binary_crossentropy', optimizer=d_opt) # Build generator and discriminator (fixed weight of discriminator). discriminator.trainable = False self.generator = self.generator_model() dcgan = Sequential([self.generator, discriminator]) g_opt = SGD(lr=0.1, momentum=0.3) dcgan.compile(loss='binary_crossentropy', optimizer=g_opt) # Execute train. num_batches = int(len(X_train) / self.batch_size) lst_scripts = [] for epoch in range(self.num_epoch): for batch in range(num_batches): # Create noise for inputting to generator. noise = np.array([ np.random.uniform(-1, 1, self.input_size) for _ in range(self.batch_size) ]) # Generate new injection code using noise. generated_codes = self.generator.predict(noise, verbose=0) # Update weight of discriminator. image_batch = X_train[batch * self.batch_size:(batch + 1) * self.batch_size] X = image_batch y = [random.uniform(0.7, 1.2) for _ in range(self.batch_size)] d_loss = discriminator.train_on_batch(X, y) X = generated_codes y = [random.uniform(0.0, 0.3) for _ in range(self.batch_size)] d_loss = discriminator.train_on_batch(X, y) # Update weight of generator. noise = np.array([ np.random.uniform(-1, 1, self.input_size) for _ in range(self.batch_size) ]) g_loss = dcgan.train_on_batch(noise, [1] * self.batch_size) # Build HTML syntax from generated codes. for generated_code in generated_codes: lst_genom = [] for gene_num in generated_code: gene_num = (gene_num * self.flt_size) + self.flt_size gene_num = int(np.round(gene_num)) if gene_num == len(self.df_genes): gene_num -= 1 lst_genom.append(int(gene_num)) str_html = self.util.transform_gene_num2str( self.df_genes, lst_genom) self.util.print_message( OK, 'Train GAN : epoch={}, batch={}, g_loss={}, d_loss={}, {} ({})' .format( epoch, batch, g_loss, d_loss, np.round((generated_code * self.flt_size) + self.flt_size), str_html)) # Evaluate generated injection code. for eval_place in self.eval_place_list: # Build html syntax. html = self.template.render({eval_place: str_html}) with codecs.open(self.eval_html_path, 'w', encoding='utf-8') as fout: fout.write(html) # Evaluate individual using selenium. selenium_score, error_flag = self.util.check_individual_selenium( self.obj_browser, self.eval_html_path) if error_flag: continue # Check generated individual using selenium. if selenium_score > 0: self.util.print_message( WARNING, 'Detect running script: "{}" in {}.'.format( str_html, eval_place)) # Save running script. lst_scripts.append([eval_place, str_html]) # Save weights of network each epoch. self.generator.save_weights( self.util.join_path( self.weight_dir, self.gen_weight_file.replace('*', str(epoch)))) discriminator.save_weights( self.util.join_path( self.weight_dir, self.dis_weight_file.replace('*', str(epoch)))) return lst_scripts
nb_epochs = 10 # you probably want to go longer than this batch_size = 256 fig = plt.figure() try: for e in range(nb_epochs): print('-'*40) #progbar = generic_utils.Progbar(X_train.shape[0]) for b in range(150): #print(b) f = b * batch_size l = (b+1) * batch_size X_batch = X_train[f:l].astype('float32') y_batch = y_train[f:l].astype('float32') loss = model.train_on_batch(X_batch, y_batch) #print(loss) #progbar.add(X_batch.shape[0], values=[("train loss", loss)]) scorev = model.evaluate(X_valid, y_valid, verbose=1) scoret = model.evaluate(X_test, y_test, verbose=1) print('Epoch: {0} | Valid: {1} | Test: {2}'.format(e, scorev, scoret)) if e % 1 == 0: Xresult = F([X_batch[:9]]) plt.clf() for i in range(9): plt.subplot(3, 3, i+1) image = np.squeeze(Xresult[0][i]) plt.imshow(image, cmap='gray') plt.axis('off') fig.canvas.draw()
X_supervised_samples_from_0 = np.asarray(train_data_0[ix_0]) Y_supervised_samples_from_0 = np.asarray(train_target_0[ix_0]) ix_1 = np.random.randint(0, len(train_data_1), int(BATCH_SIZE / 2)) X_supervised_samples_from_1 = np.asarray(train_data_1[ix_1]) Y_supervised_samples_from_1 = np.asarray(train_target_1[ix_1]) Xsup_real = np.concatenate( (X_supervised_samples_from_0, X_supervised_samples_from_1,), axis=0) ysup_real = np.concatenate( (Y_supervised_samples_from_0, Y_supervised_samples_from_1), axis=0) # update supervised discriminator (c) c_loss, c_acc = lstm.train_on_batch(Xsup_real, ysup_real) if (i + 1) % (BATCH_NUM * 1) == 0: epoch += 1 print(f"Epoch {epoch}, c model accuracy on training data: {c_acc}") _, test_acc = lstm.evaluate(test_data, test_target, verbose=0) print(f"Epoch {epoch}, c model accuracy on test data: {test_acc}") y_pred = lstm.predict(test_data, batch_size=60, verbose=0) pred_list = y_pred.tolist() for i in range(len(pred_list)): for j in range(5): if pred_list[i][j] > [0.5]: pred_list[i][j] = [1] else:
met_curve = np.zeros([0, 4]) start = time.time() for epoch in range(1, sys.maxsize): print("epoch: {0}".format(epoch)) np.random.shuffle(X_train) rnd = create_random_features(len(X_train)) # train on batch for i in range(math.ceil(len(X_train) / batch_size)): print("batch:", i, end='\r') X_batch = X_train[i * batch_size:(i + 1) * batch_size] rnd_batch = rnd[i * batch_size:(i + 1) * batch_size] loss_g, acc_g = dcgan.train_on_batch(rnd_batch, [0] * len(rnd_batch)) generated = generator.predict(rnd_batch) X = np.append(X_batch, generated, axis=0) y = [0] * len(X_batch) + [1] * len(generated) loss_d, acc_d = discriminator.train_on_batch(X, y) met_curve = np.append(met_curve, [[loss_d, acc_d, loss_g, acc_g]], axis=0) # output val_loss, faked = dcgan.evaluate(rnd_test, [0] * test_num) print("epoch end:") print("d: loss: {0:.3e} acc: {1:.3f}".format(loss_d, acc_d)) print("g: loss: {0:.3e} acc: {1:.3f}".format(loss_g, acc_g)) print("faked: {0}".format(faked))
if not os.path.exists(output_dir): os.makedirs(output_dir) All_class_files= listdir(AllClassPath) All_class_files.sort() loss_graph =[] num_iters = 20000 total_iterations = 0 batchsize=60 time_before = datetime.now() for it_num in range(num_iters): AbnormalPath = os.path.join(AllClassPath, All_class_files[0]) # Path of abnormal already computed C3D features NormalPath = os.path.join(AllClassPath, All_class_files[1]) # Path of Normal already computed C3D features inputs, targets=load_dataset_Train_batch(AbnormalPath, NormalPath) # Load normal and abnormal video C3D features batch_loss =model.train_on_batch(inputs, targets) loss_graph = np.hstack((loss_graph, batch_loss)) total_iterations += 1 if total_iterations % 20 == 1: print "These iteration=" + str(total_iterations) + ") took: " + str(datetime.now() - time_before) + ", with loss of " + str(batch_loss) iteration_path = output_dir + 'Iterations_graph_' + str(total_iterations) + '.mat' savemat(iteration_path, dict(loss_graph=loss_graph)) if total_iterations % 1000 == 0: # Save the model at every 1000th iterations. weights_path = output_dir + 'weightsAnomalyL1L2_' + str(total_iterations) + '.mat' save_model(model, model_path, weights_path) save_model(model, model_path, weights_path)
class ExpectedSarsa(ExperienceReplay): """ Args: discount: discount factor """ def __init__(self, network_parameter, max_memory=100, discount=.99, n_steps=1, alpha=.1, num_actions=3, epsilon=.1): super().__init__(max_memory, discount) self.n_steps = n_steps self.alpha = alpha self.num_actions = num_actions self.epsilon = epsilon self.network_parameter = network_parameter self.model = Sequential() self.model.add(Dense(network_parameter.hidden_size, input_shape=(network_parameter.input_size, ), activation=network_parameter.activation_function)) # for i in range(network_parameter.n_hidden_layer): self.model.add(Dense(units=network_parameter.hidden_size, activation=network_parameter.activation_function)) self.model.add(Dropout(rate=.1, noise_shape=None, seed=None)) self.model.add(Dense(units=network_parameter.hidden_size, activation=network_parameter.activation_function)) self.model.add(Dense(network_parameter.num_actions)) self.model.compile(sgd(lr=0.01, momentum=0.09), loss='mse') # rmsprop(lr=0.003) """ predicting [[ -1.55888324e-09 6.32959574e-10 -8.20817991e-09]] of state [[-0.49917767 0.00082233]] """ def cumulative_rewards(self, state, next_time_steps, game_over): state_t, action_t, reward_t, _ = state # sum all reward cumulative_rewards = sum([(row[0][2] * (self.discount ** idx)) for idx, row in enumerate(next_time_steps)]) # game is not over, choose epsilon greedy action if game_over is False: state_tp1 = next_time_steps[-1][0][3] outcome = self.model.predict(state_tp1)[0] e_policy = epsilon_greedy_policy(self.num_actions, np.argmax(outcome), self.epsilon) cumulative_rewards += (self.discount ** self.n_steps) * np.dot(outcome, e_policy) # leave it to network # targets[0, action_t] = targets[0, action_t] + self.alpha * (G - targets[0, action_t]) return cumulative_rewards def train_on_batch(self, inputs, targets): return self.model.train_on_batch(inputs, targets) def get_action(self, state): if np.random.rand() <= self.epsilon: action = np.random.randint(0, self.num_actions, size=1)[0] else: q = self.model.predict(state) action = np.argmax(q[0]) return action def get_batch(self, batch_size=10): self.positive_sample = False len_memory = len(self.memory) num_actions = self.model.output_shape[-1] # env_dim = self.memory[0][0][0].shape[1] env_dim = self.memory[0][0][0].shape[1] inputs = np.zeros((min(len_memory, batch_size), env_dim)) targets = np.zeros((inputs.shape[0], num_actions)) """ sample_distribution = [] if self.positive_sample is False: sample_distribution = self.memory[:][0] len_memory = len(sample_distribution) """ # is_print = False for i, idx in enumerate(np.random.randint(0, len_memory, size=inputs.shape[0])): state_t, action_t, reward_t, state_tp1 = self.memory[idx][0] game_over = self.memory[idx][1] inputs[i: i + 1] = state_t # There should be no target values for actions not taken. # Thou shalt not correct actions not taken #deep targets[i] = self.model.predict(state_t)[0] # np.max(model.predict(state_tp1)[0]) if game_over: # if game_over is True targets[i, action_t] = reward_t # is_print = True else: # reward_t + gamma * max_a' Q(s', a') next_time_steps = self.memory[idx: idx + self.n_steps + 1] game_over_states = np.array([x[1] for x in next_time_steps]) if len(game_over_states[game_over_states == True]) > 0: # print("game over #########################################") game_over = True idx_game_over = np.argmax(game_over_states == True) Q_sa = self.cumulative_rewards(self.memory[idx][0], next_time_steps[0: idx_game_over + 1], game_over) targets[i, action_t] = reward_t + self.discount * Q_sa # if is_print: # print(inputs, targets) return inputs, targets
X = np.linspace(-1, 1, 200) np.random.shuffle(X) # randomize the data Y = 0.5 * X + 2 + np.random.normal(0, 0.05, (200, )) # plot data plt.scatter(X, Y) plt.show() X_train, Y_train = X[:160], Y[:160] # first 160 data points X_test, Y_test = X[160:], Y[160:] # last 40 data points # build a neural network from the 1st layer to the last layer model = Sequential() model.add(Dense(units=1, input_dim=1)) # choose loss function and optimizing method model.compile(loss='mse', optimizer='sgd') # training print('Training -----------') for step in range(301): cost = model.train_on_batch(X_train, Y_train) if step % 100 == 0: print('train cost: ', cost) # test print('\nTesting ------------') cost = model.evaluate(X_test, Y_test, batch_size=40) print('test cost:', cost) W, b = model.layers[0].get_weights() print('Weights=', W, '\nbiases=', b) # plotting the prediction Y_pred = model.predict(X_test) plt.scatter(X_test, Y_test) plt.plot(X_test, Y_pred) plt.show()
print('done model construction') model.compile(loss='categorical_crossentropy', optimizer='Adadelta') print('done complie') scoring = theano.function(x_test, y_score, allow_input_downcast=True, mode=None) #history = model.fit([user ,Items] ,y_train, nb_epoch=15, batch_size=2048, verbose=2, show_accuracy=True) for i in range(0, 30): print("itr", i) for j in range(0, int(samples / batchsize + .05)): print("batch", j) user, Items, y_train = readbatch() history = model.train_on_batch( [user, Items], y_train, accuracy=True ) # nb_epoch=10, batch_size=1024, verbose=2, show_accuracy=True) curline = 0 print('done training') load_dataset( r"D:\users\t-alie\Deepfactorization\movielens.userstest_50p_5min_centeresMean_manyNeg.2048.centered", r"D:\users\t-alie\Deepfactorization\movielens.itemstest_50p_5min_centeresMean_manyNeg", r"D:\users\t-alie\Deepfactorization\movielens.itemstest_50p_5min_centeresMean_manyNeg.fakeneg" ) pfile = open(r"D:\users\t-alie\Deepfactorization\yp_cos.batch", "w") for j in range(0, int(samples / batchsize + .05)): print("testing batch", j) user, Items, y_train = readbatch() y_p = model.custom_predict([user, Items], scoring) for y in y_p:
model.add( LSTM( batch_input_shape=(BATCH_SIZE, TIME_STEPS, INPUT_SIZE), output_dim=CELL_SIZE, return_sequences=True, stateful=True, )) #add output layer model.add(TimeDistributed(Dense(OUTPUT_SIZE))) adam = Adam(LR) model.compile( optimizer=adam, loss='mse', ) print('Training ------------') for step in range(501): # data shape = (batch_num, steps, inputs/outputs) X_batch, Y_batch, xs = get_batch() cost = model.train_on_batch(X_batch, Y_batch) pred = model.predict(X_batch, BATCH_SIZE) plt.plot(xs[0, :], Y_batch[0].flatten(), 'r', xs[0, :], pred.flatten()[:TIME_STEPS], 'b--') plt.ylim((-1.2, 1.2)) plt.draw() plt.pause(0.1) if step % 10 == 0: print('train cost: ', cost)
states.append(state) target = np.copy(ls.move(2, random_move)) targets.append(target) if epoch > 23: pass #print(state) #print(target) #print("-----") winner = ls.winner() if winner > 0: print("Player {0} won.".format(winner)) reward = 0.5 # draw if winner == 1: reward = 0.1 if winner == 2: reward = 0.9 for t in targets: reward_targets.append(ls.create_target(t, reward)) i = np.array(states) t = np.array(reward_targets) #print(i) #print(t) model.train_on_batch(i, t) #print(reward_targets)
def train(): (X_train, y_train), (_, _) = fashion_mnist.load_data() X_train = (X_train.astype(np.float32) - 127.5) / 127.5 X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1) print(X_train.shape) discriminator = discriminator_model() d_opt = Adam(lr=1e-5, beta_1=0.1) discriminator.compile(loss='binary_crossentropy', optimizer=d_opt) # generator+discriminator (discriminator部分の重みは固定) discriminator.trainable = False generator = generator_model() dcgan = Sequential([generator, discriminator]) g_opt = Adam(lr=2e-4, beta_1=0.5) dcgan.compile(loss='binary_crossentropy', optimizer=g_opt) num_batches = int(X_train.shape[0] / BATCH_SIZE) print('Number of batches:', num_batches) for epoch in range(NUM_EPOCH): for index in range(num_batches): noise = np.array( [np.random.uniform(-1, 1, 100) for _ in range(BATCH_SIZE)]) image_batch = X_train[index * BATCH_SIZE:(index + 1) * BATCH_SIZE] generated_images = generator.predict(noise, verbose=0) # 生成画像を出力 if index % 500 == 0: # generate images and shape generated_images_plot = generated_images.astype( 'float32') * 127.5 + 127.5 generated_images_plot = generated_images_plot.reshape( (BATCH_SIZE, 28, 28)) plt.figure(figsize=(8, 4)) plt.suptitle('epoch=%04d,index=%04d' % (epoch, index), fontsize=20) for i in range(BATCH_SIZE): plt.subplot(4, 8, i + 1) plt.imshow(generated_images_plot[i]) plt.gray() # eliminate ticks plt.xticks([]), plt.yticks([]) # save images if not os.path.exists(GENERATED_IMAGE_PATH): os.mkdir(GENERATED_IMAGE_PATH) filename = GENERATED_IMAGE_PATH + "MNIST_%04d_%04d.png" % ( epoch, index) plt.savefig(filename) # discriminatorを更新 X = np.concatenate((image_batch, generated_images)) y = [1] * BATCH_SIZE + [0] * BATCH_SIZE d_loss = discriminator.train_on_batch(X, y) # generatorを更新 noise = np.array( [np.random.uniform(-1, 1, 100) for _ in range(BATCH_SIZE)]) g_loss = dcgan.train_on_batch(noise, [1] * BATCH_SIZE) print("epoch: %d, batch: %d, g_loss: %f, d_loss: %f" % (epoch, index, g_loss, d_loss)) generator.save_weights('generator_fashion_mnist.h5') discriminator.save_weights('discriminator_fashion_mnist.h5')
GAN.add(Discriminator) GAN.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"]) Generator.summary() Discriminator.summary() GAN.summary() for epoch in range(nEpochs): cur_outs = [] GN = GenerateNoise(noise_dim, Batch_size) cur_outs = Generator.predict(GN) cur_outs_np = np.array(cur_outs) cur_dataset = np.concatenate((Sample(dataset, Batch_size), cur_outs_np)) cur_dataset_labels = np.concatenate((np.ones(Batch_size), np.zeros(Batch_size))) Discriminator.trainable = True Discriminator.fit(cur_dataset, cur_dataset_labels, shuffle = True, epochs = 1, batch_size = 1) Discriminator.trainable = False train_noise = GenerateNoise(noise_dim, Batch_size * 2) train_labels = np.ones(Batch_size * 2) GAN.train_on_batch(train_noise, train_labels) if epoch % Check_point == 0: save_img(str(epoch) + ".png", 255 * Generator(np.random.normal(size = (noise_dim, noise_dim, 3)))) #plt.plot() #plt.show() Generator.save_weights(str(epochs) + ".h5")
class GAN: def __init__(self): self.latent_space_dim = 100 self.batch_size = 200 self.Generator = Sequential() self.Generator.add( Dense(256, input_dim=self.latent_space_dim, activation="tanh")) self.Generator.add(Dropout(0.25)) self.Generator.add(Dense(512, activation="tanh")) self.Generator.add(Dropout(0.25)) self.Generator.add(Dense(1024, activation="tanh")) self.Generator.add(Dropout(0.25)) self.Generator.add(Dense(28 * 28, activation="tanh")) self.Generator.add(Reshape((28, 28))) self.Discriminator = Sequential() self.Discriminator.add(Flatten(input_shape=(28, 28))) self.Discriminator.add(Dense(1024, activation="tanh")) self.Discriminator.add(Dense(512, activation="tanh")) self.Discriminator.add(Dense(1, activation="sigmoid")) self.Discriminator.compile(loss="binary_crossentropy", optimizer=SGD(0.005), metrics=["accuracy"]) inp_gen_tensor = Input(shape=(self.latent_space_dim, )) out_gen_model = Model(inp_gen_tensor, self.Generator(inp_gen_tensor)) inp_dis_tensor = Input(shape=(28, 28)) out_dis_model = Model(inp_dis_tensor, self.Discriminator(inp_dis_tensor)) out_dis_model.trainable = False inp_tensor = Input(shape=(self.latent_space_dim, )) self.Cascaded_model = Model(inp_tensor, out_dis_model(out_gen_model(inp_tensor))) self.Cascaded_model.compile(loss="binary_crossentropy", optimizer=SGD(0.005)) def train_one_epoch(self): for i in range(3): sampled_x = X_Train[ np.random.randint(0, X_Train.shape[0], self.batch_size), :, :] sampled_Gz = self.Generator.predict( np.random.normal(0, 1, (self.batch_size, self.latent_space_dim))) #all_samples contains both real and generated samples. all_samples = np.zeros((self.batch_size * 2, 28, 28)) all_samples[0:self.batch_size, :, :] = sampled_x all_samples[self.batch_size:self.batch_size * 2, :, :] = sampled_Gz ground_truth = np.zeros((self.batch_size * 2)) ground_truth[0:self.batch_size] = np.ones((self.batch_size)) (discriminator_loss, discriminator_acc) = self.Discriminator.train_on_batch( all_samples, ground_truth) sampled_z = np.random.normal(0, 1, (self.batch_size, self.latent_space_dim)) discriminator_loss_after_train_gen = self.Cascaded_model.train_on_batch( sampled_z, np.ones((self.batch_size))) def train(self, epochs): for epoch in range(epochs): self.train_one_epoch() if epoch % 200 == 0: print "Epoch: " + str(epoch) + " completed" sampled_Gz = self.Generator.predict( np.random.normal(0, 1, (25, self.latent_space_dim))) * 0.5 + 0.5 complete_image = np.ones((28 * 5 + 4, 28 * 5 + 4, 1)) for i in range(0, 5): for j in range(0, 5): complete_image[29 * i:29 * i + 28, 29 * j:29 * j + 28, 0] = sampled_Gz[i * 5 + j, :, :] cv2.imwrite( "images/ " + str(epoch) + ".jpg", cv2.resize(complete_image * 255.0, (0, 0), fx=3, fy=3))
class GAN: def __init__(self, image_dir, activation_function='swish'): self.activation_function = activation_function self.img_rows = 64 self.img_cols = 64 self.channels = 3 self.img_shape = (self.img_rows, self.img_cols, self.channels) self.noise_dim = 100 self.parser = argparse.ArgumentParser() self.parser.add_argument('-t', '--type', type=str, choices=([ 'centre', 'rect', 'random', 'left', 'right', 'top', 'bottom', ]), default='centre') self.args = self.parser.parse_args() self._image_dir = image_dir # to prevent having to load the image filenames every epoch, the list of filenames is retrieved once and then stored optimizer = Adam(lr=0.0002, beta_1=0.5) self.discriminator = self.create_discriminator() self.discriminator.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) self.generator = self.create_generator() # For the combined model we will only train the generator self.discriminator.trainable = False self.combined = Sequential() self.combined.add(self.generator) self.combined.add(self.discriminator) self.combined.compile(loss='binary_crossentropy', optimizer=optimizer) self.train_loss_history_discriminator = [] self.train_loss_history_generator = [] def create_generator(self): try: model = load_model("generator.h5") except OSError: input_img = Input( shape=self.img_shape ) # adapt this if using `channels_first` image data format conv1 = Conv2D(16, (3, 3), activation=self.activation_function, padding='same')(input_img) pool1 = MaxPooling2D((2, 2), padding='same')(conv1) conv2 = Conv2D(32, (3, 3), activation=self.activation_function, padding='same')(pool1) pool2 = MaxPooling2D((2, 2), padding='same')(conv2) conv3 = Conv2D(64, (3, 3), activation=self.activation_function, padding='same')(pool2) pool3 = MaxPooling2D((2, 2), padding='same')(conv3) conv4 = Conv2D(64, (3, 3), activation=self.activation_function, padding='same')(pool3) up1 = UpSampling2D((2, 2))(conv4) merge1 = concatenate([conv3, up1]) conv5 = Conv2D(32, (3, 3), activation=self.activation_function, padding='same')(merge1) up2 = UpSampling2D((2, 2))(conv5) merge2 = concatenate([conv2, up2]) conv6 = Conv2D(16, (3, 3), activation=self.activation_function, padding='same')(merge2) up3 = UpSampling2D((2, 2))(conv6) merge3 = concatenate([conv1, up3]) conv7 = Conv2D(3, (3, 3), activation='tanh', padding='same')(merge3) model = Model(input_img, conv7) model.compile(optimizer='adam', loss='mse') return model def create_discriminator(self): try: model = load_model("discriminator.h5") except OSError: model = Sequential() model.add( Conv2D(32, kernel_size=3, strides=2, input_shape=self.img_shape, padding="same", activation=self.activation_function)) model.add(Dropout(0.25)) model.add( Conv2D(64, kernel_size=3, strides=2, padding="same", activation=self.activation_function)) model.add(ZeroPadding2D(padding=((0, 1), (0, 1)))) model.add(BatchNormalization(momentum=0.8)) model.add(Dropout(0.25)) model.add( Conv2D(128, kernel_size=3, strides=2, padding="same", activation=self.activation_function)) model.add(BatchNormalization(momentum=0.8)) model.add(Dropout(0.25)) model.add( Conv2D(256, kernel_size=3, strides=1, padding="same", activation=self.activation_function)) model.add(BatchNormalization(momentum=0.8)) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(1, activation='sigmoid')) model.summary() return model def train(self, epochs, batch_size=128, sample_interval=50, train_until_no_improvement=False, improvement_threshold=0.001): # Adversarial ground truths real = np.ones((batch_size, 1)) fake = np.zeros((batch_size, 1)) for epoch in range(epochs): images = get_image_batch(self._image_dir, batch_size) # Get train ims images_holes = images + 0 for index in range(len(images)): images_holes[index, :, :, :] = remove_hole_image( images_holes[index, :, :, :], type=self.args.type) images = images / 127.5 - 1. images_holes = images_holes / 127.5 - 1. self.generator.train_on_batch(images_holes, images) # Generate a batch of new images gen_images = self.generator.predict(images_holes) # Train the discriminator d_loss_real = self.discriminator.train_on_batch(images, real) d_loss_fake = self.discriminator.train_on_batch(gen_images, fake) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) # target: output from noise vector is alwasy classified as real by discriminator # this trains the generator only, as the discriminator is not trainable g_loss = self.combined.train_on_batch(images_holes, real) print("Epoch:", epoch, "D_loss_r:", d_loss_real[0], "D_loss_f:", d_loss_fake[0], "G_loss:", g_loss) if epoch % sample_interval == 0: images = get_image_batch(self._image_dir, batch_size, val=True) # Get val ims images_holes = images + 0 for index in range(len(images)): images_holes[index, :, :, :] = remove_hole_image( images_holes[index, :, :, :], type=self.args.type) images = images / 127.5 - 1. images_holes = images_holes / 127.5 - 1. decoded_imgs = self.generator.predict(images_holes) remaining_time_estimate = (((time.time() - start_time) / 60) / (epoch + 1)) * ((epochs + 1) - (epoch + 1)) print("Estimated time remaining: {:.4} min".format( remaining_time_estimate) + "| Time elapsed: {:.4} min".format(( (time.time() - start_time) / 60))) os.makedirs(output_dir + "/images/", exist_ok=True) self.train_loss_history_discriminator.append(np.mean(d_loss)) self.train_loss_history_generator.append(g_loss) n = 10 plt.figure(figsize=(20, 4)) for i in range(n): # display original image_idx = random.randint(0, len(decoded_imgs) - 1) ax = plt.subplot(2, n, i + 1) plt.imshow( ((images_holes[image_idx].reshape(64, 64, 3) + 1) * 127.5).astype(np.uint8)) plt.gray() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) # display reconstruction ax = plt.subplot(2, n, i + n + 1) plt.imshow( ((decoded_imgs[image_idx].reshape(64, 64, 3) + 1) * 127.5).astype(np.uint8)) plt.gray() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) plt.savefig(output_dir + "/images/" + str(epoch) + ".png") plt.close() self.generator.save(output_dir + "generator.h5") self.discriminator.save(output_dir + "discriminator.h5") # Only considers the generator if train_until_no_improvement: if len(self.train_loss_history_generator ) <= sample_interval: # First run through loop last_mean_loss = 9999 current_mean_loss = 999 else: last_mean_loss = current_mean_loss current_mean_loss = np.mean( self.train_loss_history_generator[-sample_interval] ) # Take last x items from the list if (last_mean_loss - current_mean_loss) < improvement_threshold: in_a_row += 1 print("No improvement in a row: " + str(in_a_row)) if in_a_row >= 10: return # Break out of the function else: in_a_row = 0
import DataBuilder model = Sequential() model.add(Dense(units=10, input_dim=1)) model.add(Activation('tanh')) model.add(Dense(units=1)) model.add(Activation('tanh')) # 将梯度下降算法的下降度提高 sgd = SGD(lr=0.3) # sgd 随机梯度下降法 loss:均方误差 model.compile(optimizer=sgd, loss='mse') # model.compile(optimizer='sgd', loss='mse') x_train, y_train = DataBuilder.getTrainForNoneLine() for step in range(3000): cost = model.train_on_batch(x_train, y_train) if step % 100 == 0: print('cost:', cost) W, b = model.layers[0].get_weights() print("W", W, 'b:', b) y_pred = model.predict(x_train) plt.scatter(x_train, y_train) plt.plot(x_train, y_pred) plt.show()
img_map = {} for ids in image_ids: id_split = ids.split() img_map[id_split[0]] = int(id_split[1]) nlp = load('en') print 'loaded word2vec features...' ## training print 'Training started...' for k in xrange(num_epochs): progbar = generic_utils.Progbar(len(questions_train)) for qu_batch, an_batch, im_batch in zip( batches(questions_train, batch_size, fillvalue=questions_train[-1]), batches(answers_train, batch_size, fillvalue=answers_train[-1]), batches(images_train, batch_size, fillvalue=images_train[-1])): X_q_batch = get_questions_tensor_timeseries(qu_batch, nlp, max_len) X_i_batch = get_images_matrix(im_batch, img_map, VGGfeatures) Y_batch = get_answers_matrix(an_batch, labelencoder) loss = model.train_on_batch([X_q_batch, X_i_batch], Y_batch) #progbar.add(batch_size, values=[("train loss", loss, "accuracy")]) progbar.add(batch_size, values=[("train loss", loss)]) if k % model_save_interval == 0: model.save_weights(model_file_name + '_epoch_' + str(k) + '.h5') model.save_weights(model_file_name + '_epoch_' + str(k) + '.h5')
model.add(Merge([staticmodel, tempmodel], mode='concat')) model.add(Dense(300 + 300, 300)) model.add(Activation('tanh')) print('done model construction') model.compile(loss='mean_squared_error', optimizer='Adadelta') model.load_weights(r'\\ZW5338456\f$\temprepdiction_no_user_.model.lstm.') print('done complie') for i in range(10, 30): print("itr", i) staticFile = open(r"\\ZW5338456\F$\newTempOut1\fea_no_user_.static") tempFile = open(r"\\ZW5338456\F$\newTempOut1\fea_no_user_.Temp") lblFile = open(r"\\ZW5338456\F$\newTempOut1\fea_no_user_.lbl") j = 0 while True: print("batch", j) j = j + 1 staticinput, tempinput, y_train, hasmore = readbatch() history = model.train_on_batch( [staticinput, tempinput], y_train, accuracy=True ) # nb_epoch=10, batch_size=1024, verbose=2, show_accuracy=True) if not hasmore: staticFile.close() tempFile.close() lblFile.close() break model.save_weights(r'\\ZW5338456\f$\temprepdiction_no_user_.model1.lstm.' + ` i `) model.save_weights(r'\\ZW5338456\f$\temprepdiction_no_user_.model1.lstm.')
def train_model(): # Read config json file s = read_JSON('config.json') # Make result folder, contain: model file, result.json, output file for validation and test, also save config.json again folder = os.path.basename("Result").split('.')[0] if not os.path.exists(folder): os.mkdir(folder) write_JSON(s,folder + '/model_config.json') # load the dataset train_set, test_set, dic = load_atis(s['dataset']) # Convert for index # Vocabulary of meaningful words and labels are covered in dic data of Atis datset idx2label = dict((k,v) for v,k in dic['labels2idx'].items()) idx2word = dict((k,v) for v,k in dic['words2idx'].items()) #words2idx, tables2idx and labels2idx (use only vocabs and labels. Tables will list all meaning of words_which not neccessary) train_words, train_tables, train_labels = train_set test_words, test_tables, test_labels = test_set # Some para use in 'for loop' vocsize = len(dic['words2idx']) nclasses = len(dic['labels2idx']) nsentences = len(train_words) print('Nums of vocabulary get from words of each sentence: ', vocsize) print('Nums of slot: ', nclasses) print('Nums of sentence use for training: ', nsentences) print('------------------------------------------------') # instanciate the model (for randomize duel to bath size_ optimization convergence) np.random.seed(s['batch']) random.seed(s['batch']) #Making model model = Sequential() #Init model.add(Embedding(vocsize, s['emb_dimension'])) # Word Embedding model.add(SimpleRNN(s['nhidden'], activation='sigmoid', return_sequences=True)) # Recurrent use Sigmoid Activation model.add(TimeDistributed(Dense(output_dim=nclasses))) # For making Dense Layer (Context Layer) keep updating model.add(Activation("softmax")) # Softmax activation for classification adam = Adam(lr=s['lr'], beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) # Adam optimizer (some hyperparameter will be locked) #sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) #SGD + momentum #adagrad = optimizers.Adagrad(lr=0.01, epsilon=1e-08, decay=0.0) #AdaGrad model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy']) # Lost funct: Cross entropy # Train print('------------------------------------------------') print('Training...') for e in range(s['nepochs']): # shuffle shuffle([train_words, train_tables, train_labels], s['batch']) s['ce'] = e for i in range(nsentences): X = np.asarray([train_words[i]]) Y = to_categorical(np.asarray(train_labels[i])[:, np.newaxis],nclasses)[np.newaxis, :, :] if X.shape[1] == 1: continue # bug with X, Y of len 1 model.train_on_batch(X, Y) # Save weight in file 'model.h5' as HDF5 file (default), can be load by: model.load_weights('model.h5', by_name=False) model.save_weights(folder +'/model_weight.h5', overwrite=True) print(str(e + 1),' epoches done!...') # Print sign print('Finished!...') print('------------------------------------------------')
def _runner(layer_class): """ All the recurrent layers share the same interface, so we can run through them with a single function. """ # check return_sequences layer_test(layer_class, kwargs={ 'output_dim': output_dim, 'return_sequences': True }, input_shape=(nb_samples, timesteps, embedding_dim)) # check dynamic behavior layer = layer_class(output_dim, input_dim=embedding_dim) model = Sequential() model.add(layer) model.compile('sgd', 'mse') x = np.random.random((nb_samples, timesteps, embedding_dim)) y = np.random.random((nb_samples, output_dim)) model.train_on_batch(x, y) # check dropout layer_test(layer_class, kwargs={ 'output_dim': output_dim, 'dropout_U': 0.1, 'dropout_W': 0.1 }, input_shape=(nb_samples, timesteps, embedding_dim)) # check implementation modes for mode in ['cpu', 'mem', 'gpu']: layer_test(layer_class, kwargs={ 'output_dim': output_dim, 'consume_less': mode }, input_shape=(nb_samples, timesteps, embedding_dim)) # check statefulness model = Sequential() model.add( embeddings.Embedding(embedding_num, embedding_dim, mask_zero=True, input_length=timesteps, batch_input_shape=(nb_samples, timesteps))) layer = layer_class(output_dim, return_sequences=False, stateful=True, weights=None) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones((nb_samples, timesteps))) assert (out1.shape == (nb_samples, output_dim)) # train once so that the states change model.train_on_batch(np.ones((nb_samples, timesteps)), np.ones((nb_samples, output_dim))) out2 = model.predict(np.ones((nb_samples, timesteps))) # if the state is not reset, output should be different assert (out1.max() != out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones((nb_samples, timesteps))) assert (out2.max() != out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones((nb_samples, timesteps))) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones((nb_samples, timesteps))) assert (out4.max() != out5.max()) # Check masking layer.reset_states() left_padded_input = np.ones((nb_samples, timesteps)) left_padded_input[0, :1] = 0 left_padded_input[1, :2] = 0 out6 = model.predict(left_padded_input) layer.reset_states() right_padded_input = np.ones((nb_samples, timesteps)) right_padded_input[0, -1:] = 0 right_padded_input[1, -2:] = 0 out7 = model.predict(right_padded_input) assert_allclose(out7, out6, atol=1e-5) # check regularizers layer = layer_class(output_dim, return_sequences=False, weights=None, batch_input_shape=(nb_samples, timesteps, embedding_dim), W_regularizer=regularizers.WeightRegularizer(l1=0.01), U_regularizer=regularizers.WeightRegularizer(l1=0.01), b_regularizer='l2') shape = (nb_samples, timesteps, embedding_dim) layer.set_input(K.variable(np.ones(shape)), shape=shape) K.eval(layer.output)
batch_counter = 1000 rouge_su4_recall_max = 0 rouge_2_recall_max = 0 while patience < patience_limit: # train on several batchs for i in range(batch_per_epoch): print i triplets, labels = create_triplets(d2v_model, article_names, article_weights, nb_triplets=batch_size, triplets_per_file=16, neg_ratio=1, str_mode=False) fc_model.train_on_batch(triplets, labels) batch_counter += 1 # summarize DUC str_time = time.strftime("%Y_%m_%d") fc_model_name = str_time + "_fc_model_batch_" + str(batch_counter) + "k" system_folder = summary_system_super_folder + fc_model_name + "/" os.mkdir(system_folder) for theme in themes: theme_folder = tdqfs_folder + theme + "/" theme_doc_folder = theme_folder + theme + "/" queries = get_queries(theme_folder + "queries.txt") text = merge_articles_tqdfs(theme_doc_folder) for i in range(len(queries)):
model = Sequential() model.add(LSTM(batch, input_shape=(1, look_back))) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer=opt, metrics=['accuracy', mean_pred]) ## training for epoch in range(epochs): print('Epoch {}/{}'.format(epoch + 1, epochs)) itmax = int(trainX.shape[0] / batch) for i in range(itmax): hist = model.train_on_batch(trainX, trainY) ## 短期予測 trainPredict = model.predict(trainX) testPredict_shortL = [[] for j in range(len(testX) - 1)] testPredict_short = [] for i in range(len(testX) - 1): testPredict_shortL[i] = model.predict(testX[i:i + 1]) testPredict_short.append(testPredict_shortL[i][0][0]) ## 長期予測 def predict_dataset(dataset): setdata = [] #for i in range(len(dataset)): setx = []
def test_sequential(in_tmpdir): (x_train, y_train), (x_test, y_test) = _get_test_data() # TODO: factor out def data_generator(x, y, batch_size=50): index_array = np.arange(len(x)) while 1: batches = _make_batches(len(x_test), batch_size) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] x_batch = x[batch_ids] y_batch = y[batch_ids] yield (x_batch, y_batch) model = Sequential() model.add(Dense(num_hidden, input_shape=(input_dim, ))) model.add(Activation('relu')) model.add(Dense(num_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2, validation_split=0.1) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=0) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, shuffle=False) model.train_on_batch(x_train[:32], y_train[:32]) loss = model.evaluate(x_test, y_test) prediction = model.predict_generator(data_generator(x_test, y_test), 1, max_queue_size=2, verbose=1) gen_loss = model.evaluate_generator(data_generator(x_test, y_test, 50), 1, max_queue_size=2) pred_loss = K.eval( K.mean( losses.get(model.loss)(K.variable(y_test), K.variable(prediction)))) assert (np.isclose(pred_loss, loss)) assert (np.isclose(gen_loss, loss)) model.predict(x_test, verbose=0) model.predict_classes(x_test, verbose=0) model.predict_proba(x_test, verbose=0) fname = 'test_sequential_temp.h5' model.save_weights(fname, overwrite=True) model = Sequential() model.add(Dense(num_hidden, input_shape=(input_dim, ))) model.add(Activation('relu')) model.add(Dense(num_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model.load_weights(fname) os.remove(fname) nloss = model.evaluate(x_test, y_test, verbose=0) assert (loss == nloss) # test serialization config = model.get_config() Sequential.from_config(config) model.summary() json_str = model.to_json() model_from_json(json_str) yaml_str = model.to_yaml() model_from_yaml(yaml_str)
exp_preds = np.exp(preds) preds = exp_preds / np.sum(exp_preds) probas = np.random.multinomial(1, preds, 1) return np.argmax(probas) # tbCallBack = TensorBoard(write_images=True) # tbCallBack.set_model(model) for iteration in range(1, 70000): if iteration % 100 == 0: print() print('-' * 80) print('Iteration', iteration) x, y = data.next_batch(batch_size, len_section) model.train_on_batch(x, y) start_index = random.randint(0, len(data.text) - len_section - 1) if iteration % 1000 == 0: model.save(SAVE_PATH) for diversity in [0.2, 0.5, 1.0, 1.2]: print() print('----- diversity:', diversity) generated = '' sentence = data.text[start_index: start_index + len_section] generated += sentence print('----- Generating with seed: "' + sentence + '"') sys.stdout.write(generated)