def LSTM_model_memory_batch(batch_size,epoch): model = Sequential() model.add(LSTM(4, batch_input_shape=(batch_size, self.look_back, 1), stateful=True)) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='adam') for i in range(epoch): model.fit(self.trainX, self.trainY, epochs=1, batch_size=batch_size, verbose=2, shuffle=False) model.reset_states() return model
def _runner(layer_class): """ All the recurrent layers share the same interface, so we can run through them with a single function. """ for ret_seq in [True, False]: layer = layer_class(output_dim, return_sequences=ret_seq, weights=None, input_shape=(timesteps, input_dim)) layer.input = K.variable(np.ones((nb_samples, timesteps, input_dim))) layer.get_config() for train in [True, False]: out = K.eval(layer.get_output(train)) # Make sure the output has the desired shape if ret_seq: assert(out.shape == (nb_samples, timesteps, output_dim)) else: assert(out.shape == (nb_samples, output_dim)) mask = layer.get_output_mask(train) # check statefulness layer = layer_class(output_dim, return_sequences=False, stateful=True, weights=None, batch_input_shape=(nb_samples, timesteps, input_dim)) model = Sequential() model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones((nb_samples, timesteps, input_dim))) assert(out1.shape == (nb_samples, output_dim)) # train once so that the states change model.train_on_batch(np.ones((nb_samples, timesteps, input_dim)), np.ones((nb_samples, output_dim))) out2 = model.predict(np.ones((nb_samples, timesteps, input_dim))) # if the state is not reset, output should be different assert(out1.max() != out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones((nb_samples, timesteps, input_dim))) assert(out2.max() != out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones((nb_samples, timesteps, input_dim))) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones((nb_samples, timesteps, input_dim))) assert(out4.max() != out5.max())
def fit_lstm(train, batch_size, nb_epoch, neurons): X, y = train[:, 0:-1], train[:, -1] X = X.reshape(X.shape[0], 1, X.shape[1]) model = Sequential() model.add(LSTM(neurons, batch_input_shape=(batch_size, X.shape[1], X.shape[2]), stateful=True)) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='adam') for i in range(nb_epoch): model.fit(X, y, epochs=1, batch_size=batch_size, verbose=0, shuffle=False) model.reset_states() return model
def test_statefulness(layer_class): model = Sequential() model.add(embeddings.Embedding(embedding_num, embedding_dim, mask_zero=True, input_length=timesteps, batch_input_shape=(nb_samples, timesteps))) layer = layer_class(output_dim, return_sequences=False, stateful=True, weights=None) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones((nb_samples, timesteps))) assert(out1.shape == (nb_samples, output_dim)) # train once so that the states change model.train_on_batch(np.ones((nb_samples, timesteps)), np.ones((nb_samples, output_dim))) out2 = model.predict(np.ones((nb_samples, timesteps))) # if the state is not reset, output should be different assert(out1.max() != out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones((nb_samples, timesteps))) assert(out2.max() != out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones((nb_samples, timesteps))) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones((nb_samples, timesteps))) assert(out4.max() != out5.max()) # Check masking layer.reset_states() left_padded_input = np.ones((nb_samples, timesteps)) left_padded_input[0, :1] = 0 left_padded_input[1, :2] = 0 out6 = model.predict(left_padded_input) layer.reset_states() right_padded_input = np.ones((nb_samples, timesteps)) right_padded_input[0, -1:] = 0 right_padded_input[1, -2:] = 0 out7 = model.predict(right_padded_input) assert_allclose(out7, out6, atol=1e-5)
def hyper_build_model(self,space,predict,custom_batch_size=None): conf = self.conf model_conf = conf['model'] rnn_size = model_conf['rnn_size'] rnn_type = model_conf['rnn_type'] regularization = model_conf['regularization'] dropout_prob = model_conf['dropout_prob'] length = model_conf['length'] pred_length = model_conf['pred_length'] skip = model_conf['skip'] stateful = model_conf['stateful'] return_sequences = model_conf['return_sequences'] output_activation = conf['data']['target'].activation#model_conf['output_activation'] num_signals = conf['data']['num_signals'] batch_size = self.conf['training']['batch_size'] if predict: batch_size = self.conf['model']['pred_batch_size'] #so we can predict with one time point at a time! if return_sequences: length =pred_length else: length = 1 if custom_batch_size is not None: batch_size = custom_batch_size if rnn_type == 'LSTM': rnn_model = LSTM elif rnn_type == 'SimpleRNN': rnn_model =SimpleRNN else: print('Unkown Model Type, exiting.') exit(1) batch_input_shape=(batch_size,length, num_signals) model = Sequential() for _ in range(model_conf['rnn_layers']): model.add(rnn_model(rnn_size, return_sequences=return_sequences,batch_input_shape=batch_input_shape, stateful=stateful,kernel_regularizer=l2(regularization),recurrent_regularizer=l2(regularization), bias_regularizer=l2(regularization),dropout=dropout_prob,recurrent_dropout=dropout_prob)) model.add(Dropout(space['Dropout'])) if return_sequences: model.add(TimeDistributed(Dense(1,activation=output_activation))) else: model.add(Dense(1,activation=output_activation)) model.reset_states() return model
def build_model(predict,batch_size,length,featurelen): if predict: batch_size = length = 1 model = Sequential() model.add(LSTM(10 ,return_sequences=True, batch_input_shape=(batch_size, length , featurelen), stateful=True)) model.add(Dropout(0.2)) model.add(LSTM(10 , return_sequences=True,stateful=True)) model.add(Dropout(0.2)) model.add(TimeDistributed(Dense( featurelen ))) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model.reset_states() return model
def fit_lstm(train, n_batch, nb_epoch, n_neurons): X, y = train[:, 0:-1], train[:, -1] X = X.reshape(X.shape[0], 1, X.shape[1]) print('x=', X) model = Sequential() # https://keras.io/layers/recurrent/#lstm # model.add(LSTM(n_neurons, batch_input_shape=(n_batch, X.shape[1], X.shape[2]), stateful=True)) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='adam') for i in range(nb_epoch): model.fit(X, y, epochs=1, batch_size=n_batch, verbose=0, shuffle=False) model.reset_states() return model
def fit_lstm(train, n_lag, n_seq, n_batch, nb_epoch, n_neurons): # reshape training into [samples, timesteps, features] X, y = train[:, 0:n_lag], train[:, n_lag:] X = X.reshape(X.shape[0], 1, X.shape[1]) # design network model = Sequential() model.add(LSTM(n_neurons, batch_input_shape=(n_batch, X.shape[1], X.shape[2]), stateful=True)) model.add(Dense(y.shape[1])) model.compile(loss='mean_squared_error', optimizer='adam') # fit network for i in range(nb_epoch): model.fit(X, y, epochs=1, batch_size=n_batch, verbose=0, shuffle=False) model.reset_states() return model
def run(s): word_to_num, num_to_word, word_cnt = build_dict(s) X, y = build_xy(s, word_to_num, num_to_word, word_cnt) print X.shape,y.shape dim = len(word_to_num) print("Number of words: %d" % dim) print("Number of sentences: %d" % len(X)) model = Sequential() model.add(LSTM(output_dim=50, input_dim=dim, activation='sigmoid', inner_activation='hard_sigmoid')) # model.add(Dropout(0.5)) model.add(Dense(200)) model.add(Activation('sigmoid')) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) #model.summary() model.fit(X, y, batch_size=200, nb_epoch=10) N = 200 # num of sents to be generated for i in range(N): model.reset_states() k = start_token res = [k] while k != end_token: k_oh = num_to_onehot(k, dim) kk_oh = model.predict(k_oh) kk = onehot_to_num(kk_oh, dim) k = kk res.append(k) s = "" for c in res: s = s + num_to_word(c) print s
class LSTM_RNN: def __init__(self, look_back, dropout_probability = 0.2, init ='he_uniform', loss='mse', optimizer='rmsprop'): self.rnn = Sequential() self.look_back = look_back self.rnn.add(LSTM(10, stateful = True, batch_input_shape=(1, 1, 1), init=init)) self.rnn.add(Dropout(dropout_probability)) self.rnn.add(Dense(1, init=init)) self.rnn.compile(loss=loss, optimizer=optimizer) def batch_train_test(self, trainX, trainY, testX, testY, nb_epoch=150): print('Training LSTM-RNN...') for epoch in range(nb_epoch): print('Epoch '+ str(epoch+1) +'/{}'.format(nb_epoch)) training_losses = [] testing_losses = [] for i in range(len(trainX)): y_actual = trainY[i] for j in range(self.look_back): training_loss = self.rnn.train_on_batch(np.expand_dims(np.expand_dims(trainX[i][j], axis=1), axis=1), np.array([y_actual])) training_losses.append(training_loss) self.rnn.reset_states() print('Mean training loss = {}'.format(np.mean(training_losses))) mean_testing_loss = [] for i in range(len(testX)): for j in range(self.look_back): testing_loss = self.rnn.test_on_batch(np.expand_dims(np.expand_dims(testX[i][j], axis=1), axis=1), np.array([testY[i]])) testing_losses.append(testing_loss) self.rnn.reset_states() for j in range(self.look_back): y_pred = self.rnn.predict_on_batch(np.expand_dims(np.expand_dims(testX[i][j], axis=1), axis=1)) self.rnn.reset_states() mean_testing_loss = np.mean(testing_losses) print('Mean testing loss = {}'.format(mean_testing_loss)) return mean_testing_loss
def test_convolutional_recurrent(): num_row = 3 num_col = 3 filters = 5 num_samples = 2 input_channel = 2 input_num_row = 5 input_num_col = 5 sequence_len = 2 for data_format in ['channels_first', 'channels_last']: if data_format == 'channels_first': inputs = np.random.rand(num_samples, sequence_len, input_channel, input_num_row, input_num_col) else: inputs = np.random.rand(num_samples, sequence_len, input_num_row, input_num_col, input_channel) for return_sequences in [True, False]: # test for output shape: output = layer_test(convolutional_recurrent.ConvLSTM2D, kwargs={'data_format': data_format, 'return_sequences': return_sequences, 'filters': filters, 'kernel_size': (num_row, num_col), 'padding': 'valid'}, input_shape=inputs.shape) # No need to check following tests for both data formats if data_format == 'channels_first' or return_sequences: continue # Tests for statefulness model = Sequential() kwargs = {'data_format': data_format, 'return_sequences': return_sequences, 'filters': filters, 'kernel_size': (num_row, num_col), 'stateful': True, 'batch_input_shape': inputs.shape, 'padding': 'same'} layer = convolutional_recurrent.ConvLSTM2D(**kwargs) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones_like(inputs)) # train once so that the states change model.train_on_batch(np.ones_like(inputs), np.random.random(out1.shape)) out2 = model.predict(np.ones_like(inputs)) # if the state is not reset, output should be different assert(out1.max() != out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones_like(inputs)) assert(out2.max() != out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones_like(inputs)) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones_like(inputs)) assert(out4.max() != out5.max()) # check regularizers kwargs = {'data_format': data_format, 'return_sequences': return_sequences, 'kernel_size': (num_row, num_col), 'stateful': True, 'filters': filters, 'batch_input_shape': inputs.shape, 'kernel_regularizer': regularizers.L1L2(l1=0.01), 'recurrent_regularizer': regularizers.L1L2(l1=0.01), 'bias_regularizer': 'l2', 'activity_regularizer': 'l2', 'kernel_constraint': 'max_norm', 'recurrent_constraint': 'max_norm', 'bias_constraint': 'max_norm', 'padding': 'same'} layer = convolutional_recurrent.ConvLSTM2D(**kwargs) layer.build(inputs.shape) assert len(layer.losses) == 3 assert layer.activity_regularizer output = layer(K.variable(np.ones(inputs.shape))) assert len(layer.losses) == 4 K.eval(output) # check dropout layer_test(convolutional_recurrent.ConvLSTM2D, kwargs={'data_format': data_format, 'return_sequences': return_sequences, 'filters': filters, 'kernel_size': (num_row, num_col), 'padding': 'same', 'dropout': 0.1, 'recurrent_dropout': 0.1}, input_shape=inputs.shape) # check state initialization layer = convolutional_recurrent.ConvLSTM2D(filters=filters, kernel_size=(num_row, num_col), data_format=data_format, return_sequences=return_sequences) layer.build(inputs.shape) x = Input(batch_shape=inputs.shape) initial_state = layer.get_initial_state(x) y = layer(x, initial_state=initial_state) model = Model(x, y) assert model.predict(inputs).shape == layer.compute_output_shape(inputs.shape)
def test_recurrent_convolutional(): nb_row = 3 nb_col = 3 nb_filter = 5 nb_samples = 2 input_channel = 2 input_nb_row = 5 input_nb_col = 5 sequence_len = 2 for dim_ordering in ['th', 'tf']: if dim_ordering == 'th': input = np.random.rand(nb_samples, sequence_len, input_channel, input_nb_row, input_nb_col) else: # tf input = np.random.rand(nb_samples, sequence_len, input_nb_row, input_nb_col, input_channel) for return_sequences in [True, False]: # test for ouptput shape: output = layer_test(convolutional_recurrent.ConvLSTM2D, kwargs={'dim_ordering': dim_ordering, 'return_sequences': return_sequences, 'nb_filter': nb_filter, 'nb_row': nb_row, 'nb_col': nb_col, 'border_mode': "same"}, input_shape=input.shape) output_shape = [nb_samples, input_nb_row, input_nb_col] if dim_ordering == 'th': output_shape.insert(1, nb_filter) else: output_shape.insert(3, nb_filter) if return_sequences: output_shape.insert(1, sequence_len) assert output.shape == tuple(output_shape) # No need to check statefulness for both if dim_ordering == 'th' or return_sequences: continue # Tests for statefulness model = Sequential() kwargs = {'dim_ordering': dim_ordering, 'return_sequences': return_sequences, 'nb_filter': nb_filter, 'nb_row': nb_row, 'nb_col': nb_col, 'stateful': True, 'batch_input_shape': input.shape, 'border_mode': "same"} layer = convolutional_recurrent.ConvLSTM2D(**kwargs) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones_like(input)) assert(out1.shape == tuple(output_shape)) # train once so that the states change model.train_on_batch(np.ones_like(input), np.ones_like(output)) out2 = model.predict(np.ones_like(input)) # if the state is not reset, output should be different assert(out1.max() != out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones_like(input)) assert(out2.max() != out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones_like(input)) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones_like(input)) assert(out4.max() != out5.max()) # check regularizers kwargs = {'dim_ordering': dim_ordering, 'return_sequences': return_sequences, 'nb_filter': nb_filter, 'nb_row': nb_row, 'nb_col': nb_col, 'stateful': True, 'batch_input_shape': input.shape, 'W_regularizer': regularizers.WeightRegularizer(l1=0.01), 'U_regularizer': regularizers.WeightRegularizer(l1=0.01), 'b_regularizer': 'l2', 'border_mode': "same"} layer = convolutional_recurrent.ConvLSTM2D(**kwargs) layer.set_input(K.variable(np.ones(input.shape)), shape=input.shape) K.eval(layer.output) # check dropout layer_test(convolutional_recurrent.ConvLSTM2D, kwargs={'dim_ordering': dim_ordering, 'return_sequences': return_sequences, 'nb_filter': nb_filter, 'nb_row': nb_row, 'nb_col': nb_col, 'border_mode': "same", 'dropout_W': 0.1, 'dropout_U': 0.1}, input_shape=input.shape)
def build_model(self,predict,custom_batch_size=None): conf = self.conf model_conf = conf['model'] rnn_size = model_conf['rnn_size'] rnn_type = model_conf['rnn_type'] optimizer = model_conf['optimizer'] lr = model_conf['lr'] clipnorm = model_conf['clipnorm'] regularization = model_conf['regularization'] if optimizer == 'sgd': optimizer_class = SGD elif optimizer == 'adam': optimizer_class = Adam elif optimizer == 'rmsprop': optimizer_class = RMSprop elif optimizer == 'nadam': optimizer_class = Nadam else: optimizer = optimizer if lr is not None or clipnorm is not None: optimizer = optimizer_class(lr = lr,clipnorm=clipnorm) loss_fn = conf['data']['target'].loss#model_conf['loss'] dropout_prob = model_conf['dropout_prob'] length = model_conf['length'] pred_length = model_conf['pred_length'] skip = model_conf['skip'] stateful = model_conf['stateful'] return_sequences = model_conf['return_sequences'] output_activation = conf['data']['target'].activation#model_conf['output_activation'] num_signals = conf['data']['num_signals'] batch_size = self.conf['training']['batch_size'] if predict: batch_size = self.conf['model']['pred_batch_size'] #so we can predict with one time point at a time! if return_sequences: length =pred_length else: length = 1 if custom_batch_size is not None: batch_size = custom_batch_size if rnn_type == 'LSTM': rnn_model = LSTM elif rnn_type == 'SimpleRNN': rnn_model =SimpleRNN else: print('Unkown Model Type, exiting.') exit(1) batch_input_shape=(batch_size,length, num_signals) model = Sequential() # model.add(TimeDistributed(Dense(num_signals,bias=True),batch_input_shape=batch_input_shape)) for _ in range(model_conf['rnn_layers']): model.add(rnn_model(rnn_size, return_sequences=return_sequences,batch_input_shape=batch_input_shape, stateful=stateful,W_regularizer=l2(regularization),U_regularizer=l2(regularization), b_regularizer=l2(regularization),dropout_W=dropout_prob,dropout_U=dropout_prob)) model.add(Dropout(dropout_prob)) if return_sequences: model.add(TimeDistributed(Dense(1,activation=output_activation))) else: model.add(Dense(1,activation=output_activation)) model.compile(loss=loss_fn, optimizer=optimizer) model.reset_states() #model.compile(loss='mean_squared_error', optimizer='sgd') #for numerical output return model
def _runner(layer_class): """ All the recurrent layers share the same interface, so we can run through them with a single function. """ for ret_seq in [True, False]: layer = layer_class(output_dim, return_sequences=ret_seq, weights=None, input_shape=(timesteps, embedding_dim)) layer.input = K.variable(np.ones((nb_samples, timesteps, embedding_dim))) layer.get_config() for train in [True, False]: out = K.eval(layer.get_output(train)) # Make sure the output has the desired shape if ret_seq: assert out.shape == (nb_samples, timesteps, output_dim) else: assert out.shape == (nb_samples, output_dim) mask = layer.get_output_mask(train) # check statefulness model = Sequential() model.add( embeddings.Embedding( embedding_num, embedding_dim, mask_zero=True, input_length=timesteps, batch_input_shape=(nb_samples, timesteps), ) ) layer = layer_class(output_dim, return_sequences=False, stateful=True, weights=None) model.add(layer) model.compile(optimizer="sgd", loss="mse") out1 = model.predict(np.ones((nb_samples, timesteps))) assert out1.shape == (nb_samples, output_dim) # train once so that the states change model.train_on_batch(np.ones((nb_samples, timesteps)), np.ones((nb_samples, output_dim))) out2 = model.predict(np.ones((nb_samples, timesteps))) # if the state is not reset, output should be different assert out1.max() != out2.max() # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones((nb_samples, timesteps))) assert out2.max() != out3.max() # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones((nb_samples, timesteps))) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones((nb_samples, timesteps))) assert out4.max() != out5.max() # Check masking layer.reset_states() left_padded_input = np.ones((nb_samples, timesteps)) left_padded_input[0, :1] = 0 left_padded_input[1, :2] = 0 left_padded_input[2, :3] = 0 out6 = model.predict(left_padded_input) layer.reset_states() right_padded_input = np.ones((nb_samples, timesteps)) right_padded_input[0, -1:] = 0 right_padded_input[1, -2:] = 0 right_padded_input[2, -3:] = 0 out7 = model.predict(right_padded_input) assert_allclose(out7, out6, atol=1e-5)
def lstm_model( data, hidden_layer_neurons, epochs, batch_size=1, feature_dimensions=1, verbose=False): """Build an LSTM model. Args: data: Data frame of X, Y values hidden_layer_neurons: Number of neurons per layers epochs: Number of iterations for learning batch_size feature_dimensions: Dimension of features (Number of rows per feature) Returns: model: Graph of LSTM model """ # Initialize key variables start = time.time() # Process the data for fitting x_values, y_values = data[:, 0: -1], data[:, -1] x_shaped = x_values.reshape(x_values.shape[0], 1, x_values.shape[1]) # Let's do some learning! model = Sequential() ''' The Long Short-Term Memory network (LSTM) is a type of Recurrent Neural Network (RNN). A benefit of this type of network is that it can learn and remember over long sequences and does not rely on a pre-specified window lagged observation as input. In Keras, this is referred to as being "stateful", and involves setting the "stateful" argument to "True" when defining an LSTM layer. By default, an LSTM layer in Keras maintains state between data within one batch. A batch of data is a fixed-sized number of rows from the training dataset that defines how many patterns (sequences) to process before updating the weights of the network. A state is: Where am I now inside a sequence? Which time step is it? How is this particular sequence behaving since its beginning up to now? A weight is: What do I know about the general behavior of all sequences I've seen so far? State in the LSTM layer between batches is cleared by default. This is undesirable therefore we must make the LSTM stateful. This gives us fine-grained control over when state of the LSTM layer is cleared, by calling the reset_states() function during the model.fit() method. LSTM networks can be stacked in Keras in the same way that other layer types can be stacked. One addition to the configuration that is required is that an LSTM layer prior to each subsequent LSTM layer must return the sequence. This can be done by setting the return_sequences parameter on the layer to True. batch_size denotes the subset size of your training sample (e.g. 100 out of 1000) which is going to be used in order to train the network during its learning process. Each batch trains network in a successive order, taking into account the updated weights coming from the appliance of the previous batch. return_sequence indicates if a recurrent layer of the network should return its entire output sequence (i.e. a sequence of vectors of specific dimension) to the next layer of the network, or just its last only output which is a single vector of the same dimension. This value can be useful for networks conforming with an RNN architecture. batch_input_shape defines that the sequential classification of the neural network can accept input data of the defined only batch size, restricting in that way the creation of any variable dimension vector. It is widely used in stacked LSTM networks. It is a tuple of (batch_size, timesteps, data_dimension) ''' timesteps = x_shaped.shape[1] data_dimension = x_shaped.shape[2] # Add layers to the model model.add( LSTM( units=hidden_layer_neurons, batch_input_shape=(batch_size, timesteps, data_dimension), return_sequences=True, stateful=True ) ) model.add(Dropout(0.2)) model.add( LSTM( units=hidden_layer_neurons, batch_input_shape=(batch_size, timesteps, data_dimension), return_sequences=False, stateful=True ) ) model.add(Dropout(0.2)) model.add( Dense( units=feature_dimensions ) ) # model.add(Activation('linear')) ''' Once the network is specified, it must be compiled into an efficient symbolic representation using a backend mathematical library, such as TensorFlow. In compiling the network, we must specify a loss function and optimization algorithm. We will use "mean_squared_error" or "mse" as the loss function as it closely matches RMSE that we will are interested in, and the efficient ADAM optimization algorithm. ''' model.compile(loss='mse', optimizer='adam', metrics=['accuracy']) ''' Once the model is compiled, the network can be fit to the training data. Because the network is stateful, we must control when the internal state is reset. Therefore, we must manually manage the training process one epoch at a time across the desired number of epochs. By default, the samples within an epoch are shuffled prior to being exposed to the network. Again, this is undesirable for the LSTM because we want the network to build up state as it learns across the sequence of observations. We can disable the shuffling of samples by setting "shuffle" to "False". ''' for _ in range(epochs): model.fit( x_shaped, y_values, batch_size=batch_size, shuffle=False, epochs=1, verbose=verbose, validation_split=0.05) ''' When the fit process reaches the total length of the samples, model.reset_states() is called to reset the internal state at the end of the training epoch, ready for the next training iteration. This iteration will start training from the beginning of the dataset therefore state will need to be reset as the previous state would only be relevant to the prior epoch iteration. ''' model.reset_states() print('\n> Training Time: {:20.2f}'.format(time.time() - start)) return model
def main(): parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('--dataset', type=str, help='Dataset file', required=True) parser.add_argument('--splitfile', type=str, help='Split file', required=True) parser.add_argument('--hiddenunits', type=int, help='Number of LSTM hidden units.', default=200, required=False) parser.add_argument('--batchsize', type=int, help='Number of sequences to process in a batch.', default=5, required=False) parser.add_argument('--timewindow', type=int, help='Number of timesteps to process in a batch.', default=100, required=False) parser.add_argument('--epochs', type=int, help='Number of epochs.', default=50, required=False) args = parser.parse_args() dataset = args.dataset split_file = args.splitfile hidden_units = args.hiddenunits batch_size = args.batchsize time_window = args.timewindow epochs = args.epochs model_file = dataset + '.model_weights' history_file = dataset + '.history' preds_file = dataset + '.preds' overall_loss = [0.0] preds = [] history = [] # load dataset training_seqs, testing_seqs, num_skills = load_dataset(dataset, split_file) print "Training Sequences: %d" % len(training_seqs) print "Testing Sequences: %d" % len(testing_seqs) print "Number of skills: %d" % num_skills # Our loss function # The model gives predictions for all skills so we need to get the # prediction for the skill at time t. We do that by taking the column-wise # dot product between the predictions at each time slice and a # one-hot encoding of the skill at time t. # y_true: (nsamples x nsteps x nskills+1) # y_pred: (nsamples x nsteps x nskills) def loss_function(y_true, y_pred): skill = y_true[:,:,0:num_skills] obs = y_true[:,:,num_skills] rel_pred = Th.sum(y_pred * skill, axis=2) # keras implementation does a mean on the last dimension (axis=-1) which # it assumes is a singleton dimension. But in our context that would # be wrong. return K.binary_crossentropy(rel_pred, obs) # build model model = Sequential() # ignore padding model.add(Masking(-1.0, batch_input_shape=(batch_size, time_window, num_skills*2))) # lstm configured to keep states between batches model.add(LSTM(input_dim = num_skills*2, output_dim = hidden_units, return_sequences=True, batch_input_shape=(batch_size, time_window, num_skills*2), stateful = True )) # readout layer. TimeDistributedDense uses the same weights for all # time steps. model.add(TimeDistributedDense(input_dim = hidden_units, output_dim = num_skills, activation='sigmoid')) # optimize with rmsprop which dynamically adapts the learning # rate of each weight. model.compile(loss=loss_function, optimizer='rmsprop',class_mode="binary") # training function def trainer(X, Y): overall_loss[0] += model.train_on_batch(X, Y)[0] # prediction def predictor(X, Y): batch_activations = model.predict_on_batch(X) skill = Y[:,:,0:num_skills] obs = Y[:,:,num_skills] y_pred = np.squeeze(np.array(batch_activations)) rel_pred = np.sum(y_pred * skill, axis=2) for b in xrange(0, X.shape[0]): for t in xrange(0, X.shape[1]): if X[b, t, 0] == -1.0: continue preds.append((rel_pred[b][t], obs[b][t])) # call when prediction batch is finished # resets LSTM state because we are done with all sequences in the batch def finished_prediction_batch(percent_done): model.reset_states() # similiar to the above def finished_batch(percent_done): print "(%4.3f %%) %f" % (percent_done, overall_loss[0]) model.reset_states() # run the model for e in xrange(0, epochs): model.reset_states() # train run_func(training_seqs, num_skills, trainer, batch_size, time_window, finished_batch) model.reset_states() # test run_func(testing_seqs, num_skills, predictor, batch_size, time_window, finished_prediction_batch) # compute AUC auc = roc_auc_score([p[1] for p in preds], [p[0] for p in preds]) # log history.append((overall_loss[0], auc)) # save model model.save_weights(model_file, overwrite=True) print "==== Epoch: %d, Test AUC: %f" % (e, auc) # reset loss overall_loss[0] = 0.0 # save predictions with open(preds_file, 'w') as f: f.write('was_heldout\tprob_recall\tstudent_recalled\n') for pred in preds: f.write('1\t%f\t%d\n' % (pred[0], pred[1])) with open(history_file, 'w') as f: for h in history: f.write('\t'.join([str(he) for he in h])) f.write('\n') # clear preds preds = []
#Save weights name = 'neumonia_dataset_interson_keras_alldata_{0}_weights_cnn_{0}.h5'.format(number_db,(number_db)) print(name) model.save_weights(name,overwrite=True) #l = h5py.File("loss_history_{0}.hdf5".format(number_db), "w") #dset = f.create_dataset("loss_history_{0}".format(number_db), (100,), dtype='i') #A way to open a model with weights in the same arquitecture ''' json_string = model.to_json() open('my_model_architecture.json', 'w').write(json_string) model.save_weights('my_model_weights.h5') ''' import cPickle f = open('cnn_loss_{0}.pkl'.format(number_db),'wb') cPickle.dump(history.losses,f,protocol=cPickle.HIGHEST_PROTOCOL) f.close() h = open('cnn_metrics_{0}.pkl'.format(number_db),'wb') p = [sensitivity, specificity, F1, mcc] cPickle.dump(p ,h,protocol=cPickle.HIGHEST_PROTOCOL) h.close() model.reset_states() #import matplotlib.pylab as plt #plt.plot(history.losses,'bo') #plt.xlabel('Iteration') #plt.ylabel('Binary Cross Entropy') #plt.show()
def generate_lstm_gmm(seq, maxlen=1, bs=500, ep=2, output_iterations=10, num_mixture_components=3): # seq is a single sample, in the format (timesteps, features) ! # TODO: expand code to support multiple samples, fed into model together as a batch # Cut the timeseries data (variable name 'seq') into semi-redundant sequence chunks of maxlen X = [] y = [] for i in range(0, len(seq) - maxlen): X.append(seq[i:i+maxlen]) y.append(seq[i+maxlen]) dim = len((X[0][0])) print("sequence chunks:", len(X)) print("chunk width:", len(X[0])) print("vector dimension:", dim) print("number of mixture components:", num_mixture_components) print("batch size:", bs) X = np.array(X) y = np.array(y) # build the model: 2 stacked LSTM print('Build model...') model = Sequential() model.reset_states() model.add(LSTM((dim+2) * num_mixture_components, return_sequences=False, input_shape=(maxlen, dim))) model.add(Dense((dim+2) * num_mixture_components)) model.add(GMMActivation(num_mixture_components)) model.compile(loss=gmm_loss, optimizer=RMSprop(lr=0.001)) # Train the model model.fit(X, y, batch_size=bs, nb_epoch=ep) # Generate timeseries x_seed = X[len(X)-1] #choose final in-sample data point to initialize model x_array = [] x_array.append(x_seed) x = np.array(x_array) predicted = [] for i in range(output_iterations): pred_parameters = model.predict_on_batch(x)[0] means = pred_parameters[:num_mixture_components * dim] sds = pred_parameters[(num_mixture_components * dim):(num_mixture_components * (dim+1))] weights = pred_parameters[(num_mixture_components * (dim + 1)):] print(means) print(sds) print(weights) means = means.reshape(num_mixture_components, dim) sds = sds[:, np.newaxis] weights = weights[:, np.newaxis] pred = weights * np.random.normal(means, sds) pred = np.sum(pred, axis=0) predicted.append(pred) return predicted
def _runner(layer_class): """ All the recurrent layers share the same interface, so we can run through them with a single function. """ # check return_sequences layer_test(layer_class, kwargs={'output_dim': output_dim, 'return_sequences': True}, input_shape=(3, 2, 3)) # check dropout layer_test(layer_class, kwargs={'output_dim': output_dim, 'dropout_U': 0.1, 'dropout_W': 0.1}, input_shape=(3, 2, 3)) # check implementation modes for mode in ['cpu', 'mem', 'gpu']: layer_test(layer_class, kwargs={'output_dim': output_dim, 'consume_less': mode}, input_shape=(3, 2, 3)) # check statefulness model = Sequential() model.add(embeddings.Embedding(embedding_num, embedding_dim, mask_zero=True, input_length=timesteps, batch_input_shape=(nb_samples, timesteps))) layer = layer_class(output_dim, return_sequences=False, stateful=True, weights=None) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones((nb_samples, timesteps))) assert(out1.shape == (nb_samples, output_dim)) # train once so that the states change model.train_on_batch(np.ones((nb_samples, timesteps)), np.ones((nb_samples, output_dim))) out2 = model.predict(np.ones((nb_samples, timesteps))) # if the state is not reset, output should be different assert(out1.max() != out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones((nb_samples, timesteps))) assert(out2.max() != out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones((nb_samples, timesteps))) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones((nb_samples, timesteps))) assert(out4.max() != out5.max()) # Check masking layer.reset_states() left_padded_input = np.ones((nb_samples, timesteps)) left_padded_input[0, :1] = 0 left_padded_input[1, :2] = 0 left_padded_input[2, :3] = 0 out6 = model.predict(left_padded_input) layer.reset_states() right_padded_input = np.ones((nb_samples, timesteps)) right_padded_input[0, -1:] = 0 right_padded_input[1, -2:] = 0 right_padded_input[2, -3:] = 0 out7 = model.predict(right_padded_input) assert_allclose(out7, out6, atol=1e-5) # check regularizers layer = layer_class(output_dim, return_sequences=False, weights=None, batch_input_shape=(nb_samples, timesteps, embedding_dim), W_regularizer=regularizers.WeightRegularizer(l1=0.01), U_regularizer=regularizers.WeightRegularizer(l1=0.01), b_regularizer='l2') shape = (nb_samples, timesteps, embedding_dim) layer.set_input(K.variable(np.ones(shape)), shape=shape) K.eval(layer.output)
def generate_audio(X, Y, seed_X): """ X: array of input sequences Y: next value for each input sequence seed_X: a single sequence to use as seed for generation """ # reshape to input format needed for the NN X = np.reshape(X, (X.shape[0], X.shape[1], 1)) seed_X = np.reshape(seed_X, (seed_X.shape[0], seed_X.shape[1], 1)) # train new model or use pre trained model? USE_SAVED_MODEL = False model_arch_file = 'model_architecture.json' model_weight_file = 'model_weights.h5' print "Architecture file:", model_arch_file print "Weight file:", model_weight_file model = None if USE_SAVED_MODEL: print "Loading model ..." model = model_from_json(open(model_arch_file).read()) model.load_weights(model_weight_file) else: model = Sequential() layers = [1, 10, 20, 1] # add layers model.add(LSTM( input_dim=layers[0], output_dim=layers[1], return_sequences=True, # stateful=True, # batch_input_shape=(32, 49, 1) )) model.add(Dropout(0.2)) model.add(LSTM( layers[2], return_sequences=False, # stateful=True, # batch_input_shape=(32, 49, 1) )) model.add(Dropout(0.2)) model.add(Dense( output_dim=layers[3])) model.add(Activation("linear")) # save model print "Saving model ..." json_string = model.to_json() open(model_arch_file, 'w').write(json_string) model.save_weights(model_weight_file, overwrite=True) # compile model in both cases start = time.time() print "Started compilation: ", start model.compile(loss="mse", optimizer="rmsprop") print "Compilation Time: ", time.time() - start # train if using new model if not USE_SAVED_MODEL: # train model.fit(X, Y, batch_size=32, nb_epoch=5, validation_split=0.05 ) # generate new sequence model.reset_states() gen_seconds = 3 generated = [None for i in range(DEFAULT_RATE) * gen_seconds] # generate 5 seconds of new music print seed_X.shape for i in xrange(DEFAULT_RATE * gen_seconds): sys.stdout.write("\r" + str(float(i)/(DEFAULT_RATE * gen_seconds))) predicted = model.predict(seed_X)[0] generated[i] = predicted seed_X[0,:,0] = np.append(seed_X[0,1:,0], predicted) return np.array(generated)