Example #1
0
	def LSTM_model_memory_batch(batch_size,epoch):
		model = Sequential()
		model.add(LSTM(4, batch_input_shape=(batch_size, self.look_back, 1), stateful=True))
		model.add(Dense(1))
		model.compile(loss='mean_squared_error', optimizer='adam')
		for i in range(epoch):
			model.fit(self.trainX, self.trainY, epochs=1, batch_size=batch_size, verbose=2, shuffle=False)
			model.reset_states()
		return model 
Example #2
0
def _runner(layer_class):
    """
    All the recurrent layers share the same interface,
    so we can run through them with a single function.
    """
    for ret_seq in [True, False]:
        layer = layer_class(output_dim, return_sequences=ret_seq,
                            weights=None, input_shape=(timesteps, input_dim))
        layer.input = K.variable(np.ones((nb_samples, timesteps, input_dim)))
        layer.get_config()

        for train in [True, False]:
            out = K.eval(layer.get_output(train))
            # Make sure the output has the desired shape
            if ret_seq:
                assert(out.shape == (nb_samples, timesteps, output_dim))
            else:
                assert(out.shape == (nb_samples, output_dim))

            mask = layer.get_output_mask(train)

    # check statefulness
    layer = layer_class(output_dim, return_sequences=False,
                        stateful=True,
                        weights=None,
                        batch_input_shape=(nb_samples, timesteps, input_dim))
    model = Sequential()
    model.add(layer)
    model.compile(optimizer='sgd', loss='mse')
    out1 = model.predict(np.ones((nb_samples, timesteps, input_dim)))
    assert(out1.shape == (nb_samples, output_dim))

    # train once so that the states change
    model.train_on_batch(np.ones((nb_samples, timesteps, input_dim)),
                         np.ones((nb_samples, output_dim)))
    out2 = model.predict(np.ones((nb_samples, timesteps, input_dim)))

    # if the state is not reset, output should be different
    assert(out1.max() != out2.max())

    # check that output changes after states are reset
    # (even though the model itself didn't change)
    layer.reset_states()
    out3 = model.predict(np.ones((nb_samples, timesteps, input_dim)))
    assert(out2.max() != out3.max())

    # check that container-level reset_states() works
    model.reset_states()
    out4 = model.predict(np.ones((nb_samples, timesteps, input_dim)))
    assert_allclose(out3, out4, atol=1e-5)

    # check that the call to `predict` updated the states
    out5 = model.predict(np.ones((nb_samples, timesteps, input_dim)))
    assert(out4.max() != out5.max())
Example #3
0
def fit_lstm(train, batch_size, nb_epoch, neurons):
    X, y = train[:, 0:-1], train[:, -1]
    X = X.reshape(X.shape[0], 1, X.shape[1])
    model = Sequential()
    model.add(LSTM(neurons, batch_input_shape=(batch_size, X.shape[1], X.shape[2]), stateful=True))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    for i in range(nb_epoch):
        model.fit(X, y, epochs=1, batch_size=batch_size, verbose=0, shuffle=False)
        model.reset_states()
    return model
Example #4
0
def test_statefulness(layer_class):
    model = Sequential()
    model.add(embeddings.Embedding(embedding_num, embedding_dim,
                                   mask_zero=True,
                                   input_length=timesteps,
                                   batch_input_shape=(nb_samples, timesteps)))
    layer = layer_class(output_dim, return_sequences=False,
                        stateful=True,
                        weights=None)
    model.add(layer)
    model.compile(optimizer='sgd', loss='mse')
    out1 = model.predict(np.ones((nb_samples, timesteps)))
    assert(out1.shape == (nb_samples, output_dim))

    # train once so that the states change
    model.train_on_batch(np.ones((nb_samples, timesteps)),
                         np.ones((nb_samples, output_dim)))
    out2 = model.predict(np.ones((nb_samples, timesteps)))

    # if the state is not reset, output should be different
    assert(out1.max() != out2.max())

    # check that output changes after states are reset
    # (even though the model itself didn't change)
    layer.reset_states()
    out3 = model.predict(np.ones((nb_samples, timesteps)))
    assert(out2.max() != out3.max())

    # check that container-level reset_states() works
    model.reset_states()
    out4 = model.predict(np.ones((nb_samples, timesteps)))
    assert_allclose(out3, out4, atol=1e-5)

    # check that the call to `predict` updated the states
    out5 = model.predict(np.ones((nb_samples, timesteps)))
    assert(out4.max() != out5.max())

    # Check masking
    layer.reset_states()

    left_padded_input = np.ones((nb_samples, timesteps))
    left_padded_input[0, :1] = 0
    left_padded_input[1, :2] = 0
    out6 = model.predict(left_padded_input)

    layer.reset_states()

    right_padded_input = np.ones((nb_samples, timesteps))
    right_padded_input[0, -1:] = 0
    right_padded_input[1, -2:] = 0
    out7 = model.predict(right_padded_input)

    assert_allclose(out7, out6, atol=1e-5)
Example #5
0
    def hyper_build_model(self,space,predict,custom_batch_size=None):
        conf = self.conf
        model_conf = conf['model']
        rnn_size = model_conf['rnn_size']
        rnn_type = model_conf['rnn_type']
        regularization = model_conf['regularization']

        dropout_prob = model_conf['dropout_prob']
        length = model_conf['length']
        pred_length = model_conf['pred_length']
        skip = model_conf['skip']
        stateful = model_conf['stateful']
        return_sequences = model_conf['return_sequences']
        output_activation = conf['data']['target'].activation#model_conf['output_activation']
        num_signals = conf['data']['num_signals']


        batch_size = self.conf['training']['batch_size']
        if predict:
            batch_size = self.conf['model']['pred_batch_size']
            #so we can predict with one time point at a time!
            if return_sequences:
                length =pred_length
            else:
                length = 1

        if custom_batch_size is not None:
            batch_size = custom_batch_size

        if rnn_type == 'LSTM':
            rnn_model = LSTM
        elif rnn_type == 'SimpleRNN':
            rnn_model =SimpleRNN
        else:
            print('Unkown Model Type, exiting.')
            exit(1)
        
        batch_input_shape=(batch_size,length, num_signals)
        model = Sequential()

        for _ in range(model_conf['rnn_layers']):
            model.add(rnn_model(rnn_size, return_sequences=return_sequences,batch_input_shape=batch_input_shape,
                stateful=stateful,kernel_regularizer=l2(regularization),recurrent_regularizer=l2(regularization),
                bias_regularizer=l2(regularization),dropout=dropout_prob,recurrent_dropout=dropout_prob))
            model.add(Dropout(space['Dropout']))
        if return_sequences:
            model.add(TimeDistributed(Dense(1,activation=output_activation)))
        else:
            model.add(Dense(1,activation=output_activation))
        model.reset_states()

        return model
Example #6
0
def build_model(predict,batch_size,length,featurelen):
    if predict:
        batch_size = length = 1
    model = Sequential()
    model.add(LSTM(10 ,return_sequences=True, batch_input_shape=(batch_size, length , featurelen), stateful=True))
    model.add(Dropout(0.2))
    model.add(LSTM(10 , return_sequences=True,stateful=True))
    model.add(Dropout(0.2))
    model.add(TimeDistributed(Dense( featurelen )))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    model.reset_states()
    return model
Example #7
0
def fit_lstm(train, n_batch, nb_epoch, n_neurons):
    X, y = train[:, 0:-1], train[:, -1]
    X = X.reshape(X.shape[0], 1, X.shape[1])
    print('x=', X)
    model = Sequential()
    # https://keras.io/layers/recurrent/#lstm
    # 
    model.add(LSTM(n_neurons, batch_input_shape=(n_batch, X.shape[1], X.shape[2]), stateful=True))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    for i in range(nb_epoch):
        model.fit(X, y, epochs=1, batch_size=n_batch, verbose=0, shuffle=False)
        model.reset_states()
    return model
Example #8
0
def fit_lstm(train, n_lag, n_seq, n_batch, nb_epoch, n_neurons):
        # reshape training into [samples, timesteps, features]
        X, y = train[:, 0:n_lag], train[:, n_lag:]
        X = X.reshape(X.shape[0], 1, X.shape[1])
        # design network
        model = Sequential()
        model.add(LSTM(n_neurons, batch_input_shape=(n_batch, X.shape[1], X.shape[2]), stateful=True))
        model.add(Dense(y.shape[1]))
        model.compile(loss='mean_squared_error', optimizer='adam')
        # fit network
        for i in range(nb_epoch):
                model.fit(X, y, epochs=1, batch_size=n_batch, verbose=0, shuffle=False)
                model.reset_states()
        return model
Example #9
0
def run(s):
    word_to_num, num_to_word, word_cnt = build_dict(s)
    X, y = build_xy(s, word_to_num, num_to_word, word_cnt)
    
    print X.shape,y.shape
    dim = len(word_to_num)

    
    print("Number of words: %d" % dim)
    print("Number of sentences: %d" % len(X))
    
    model = Sequential()
    model.add(LSTM(output_dim=50, input_dim=dim, activation='sigmoid', inner_activation='hard_sigmoid'))
    # model.add(Dropout(0.5))
    model.add(Dense(200))
    model.add(Activation('sigmoid'))
    
    model.compile(loss='binary_crossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy'])
    
    #model.summary()
    
    model.fit(X, y, batch_size=200, nb_epoch=10)
    
    N = 200 # num of sents to be generated
    
    for i in range(N):
        model.reset_states()
        k = start_token
        res = [k]
        while k != end_token:
            k_oh = num_to_onehot(k, dim)
            kk_oh = model.predict(k_oh)
            kk = onehot_to_num(kk_oh, dim)
            k = kk
            res.append(k)
        s = ""
        for c in res:
            s = s + num_to_word(c)
        print s
class LSTM_RNN:

    def __init__(self, look_back, dropout_probability = 0.2, init ='he_uniform', loss='mse', optimizer='rmsprop'):
        self.rnn = Sequential()
        self.look_back = look_back
        self.rnn.add(LSTM(10, stateful = True, batch_input_shape=(1, 1, 1), init=init))
        self.rnn.add(Dropout(dropout_probability))
        self.rnn.add(Dense(1, init=init))
        self.rnn.compile(loss=loss, optimizer=optimizer)

    def batch_train_test(self, trainX, trainY, testX, testY, nb_epoch=150):
        print('Training LSTM-RNN...')
        for epoch in range(nb_epoch):
            print('Epoch '+ str(epoch+1) +'/{}'.format(nb_epoch))
            training_losses = []
            testing_losses = []
            for i in range(len(trainX)):
                y_actual = trainY[i]
                for j in range(self.look_back):
                    training_loss = self.rnn.train_on_batch(np.expand_dims(np.expand_dims(trainX[i][j], axis=1), axis=1),
                                                       np.array([y_actual]))
                    training_losses.append(training_loss)
                self.rnn.reset_states()

            print('Mean training loss = {}'.format(np.mean(training_losses)))

            mean_testing_loss = []
            for i in range(len(testX)):
                for j in range(self.look_back):
                    testing_loss = self.rnn.test_on_batch(np.expand_dims(np.expand_dims(testX[i][j], axis=1), axis=1),
                                                          np.array([testY[i]]))
                    testing_losses.append(testing_loss)
                self.rnn.reset_states()

                for j in range(self.look_back):
                    y_pred = self.rnn.predict_on_batch(np.expand_dims(np.expand_dims(testX[i][j], axis=1), axis=1))
                self.rnn.reset_states()

            mean_testing_loss = np.mean(testing_losses)
            print('Mean testing loss = {}'.format(mean_testing_loss))
        return mean_testing_loss
def test_convolutional_recurrent():
    num_row = 3
    num_col = 3
    filters = 5
    num_samples = 2
    input_channel = 2
    input_num_row = 5
    input_num_col = 5
    sequence_len = 2
    for data_format in ['channels_first', 'channels_last']:

        if data_format == 'channels_first':
            inputs = np.random.rand(num_samples, sequence_len,
                                    input_channel,
                                    input_num_row, input_num_col)
        else:
            inputs = np.random.rand(num_samples, sequence_len,
                                    input_num_row, input_num_col,
                                    input_channel)

        for return_sequences in [True, False]:
            # test for output shape:
            output = layer_test(convolutional_recurrent.ConvLSTM2D,
                                kwargs={'data_format': data_format,
                                        'return_sequences': return_sequences,
                                        'filters': filters,
                                        'kernel_size': (num_row, num_col),
                                        'padding': 'valid'},
                                input_shape=inputs.shape)

            # No need to check following tests for both data formats
            if data_format == 'channels_first' or return_sequences:
                continue

            # Tests for statefulness
            model = Sequential()
            kwargs = {'data_format': data_format,
                      'return_sequences': return_sequences,
                      'filters': filters,
                      'kernel_size': (num_row, num_col),
                      'stateful': True,
                      'batch_input_shape': inputs.shape,
                      'padding': 'same'}
            layer = convolutional_recurrent.ConvLSTM2D(**kwargs)

            model.add(layer)
            model.compile(optimizer='sgd', loss='mse')
            out1 = model.predict(np.ones_like(inputs))

            # train once so that the states change
            model.train_on_batch(np.ones_like(inputs),
                                 np.random.random(out1.shape))
            out2 = model.predict(np.ones_like(inputs))

            # if the state is not reset, output should be different
            assert(out1.max() != out2.max())

            # check that output changes after states are reset
            # (even though the model itself didn't change)
            layer.reset_states()
            out3 = model.predict(np.ones_like(inputs))
            assert(out2.max() != out3.max())

            # check that container-level reset_states() works
            model.reset_states()
            out4 = model.predict(np.ones_like(inputs))
            assert_allclose(out3, out4, atol=1e-5)

            # check that the call to `predict` updated the states
            out5 = model.predict(np.ones_like(inputs))
            assert(out4.max() != out5.max())

            # check regularizers
            kwargs = {'data_format': data_format,
                      'return_sequences': return_sequences,
                      'kernel_size': (num_row, num_col),
                      'stateful': True,
                      'filters': filters,
                      'batch_input_shape': inputs.shape,
                      'kernel_regularizer': regularizers.L1L2(l1=0.01),
                      'recurrent_regularizer': regularizers.L1L2(l1=0.01),
                      'bias_regularizer': 'l2',
                      'activity_regularizer': 'l2',
                      'kernel_constraint': 'max_norm',
                      'recurrent_constraint': 'max_norm',
                      'bias_constraint': 'max_norm',
                      'padding': 'same'}

            layer = convolutional_recurrent.ConvLSTM2D(**kwargs)
            layer.build(inputs.shape)
            assert len(layer.losses) == 3
            assert layer.activity_regularizer
            output = layer(K.variable(np.ones(inputs.shape)))
            assert len(layer.losses) == 4
            K.eval(output)

            # check dropout
            layer_test(convolutional_recurrent.ConvLSTM2D,
                       kwargs={'data_format': data_format,
                               'return_sequences': return_sequences,
                               'filters': filters,
                               'kernel_size': (num_row, num_col),
                               'padding': 'same',
                               'dropout': 0.1,
                               'recurrent_dropout': 0.1},
                       input_shape=inputs.shape)

            # check state initialization
            layer = convolutional_recurrent.ConvLSTM2D(filters=filters,
                                                       kernel_size=(num_row, num_col),
                                                       data_format=data_format,
                                                       return_sequences=return_sequences)
            layer.build(inputs.shape)
            x = Input(batch_shape=inputs.shape)
            initial_state = layer.get_initial_state(x)
            y = layer(x, initial_state=initial_state)
            model = Model(x, y)
            assert model.predict(inputs).shape == layer.compute_output_shape(inputs.shape)
def test_recurrent_convolutional():
    nb_row = 3
    nb_col = 3
    nb_filter = 5
    nb_samples = 2
    input_channel = 2
    input_nb_row = 5
    input_nb_col = 5
    sequence_len = 2
    for dim_ordering in ['th', 'tf']:

        if dim_ordering == 'th':
            input = np.random.rand(nb_samples, sequence_len,
                                   input_channel,
                                   input_nb_row, input_nb_col)
        else:  # tf
            input = np.random.rand(nb_samples, sequence_len,
                                   input_nb_row, input_nb_col,
                                   input_channel)

        for return_sequences in [True, False]:
            # test for ouptput shape:
            output = layer_test(convolutional_recurrent.ConvLSTM2D,
                                kwargs={'dim_ordering': dim_ordering,
                                        'return_sequences': return_sequences,
                                        'nb_filter': nb_filter,
                                        'nb_row': nb_row,
                                        'nb_col': nb_col,
                                        'border_mode': "same"},
                                input_shape=input.shape)

            output_shape = [nb_samples, input_nb_row, input_nb_col]

            if dim_ordering == 'th':
                output_shape.insert(1, nb_filter)
            else:
                output_shape.insert(3, nb_filter)

            if return_sequences:
                output_shape.insert(1, sequence_len)

            assert output.shape == tuple(output_shape)

            # No need to check statefulness for both
            if dim_ordering == 'th' or return_sequences:
                continue

            # Tests for statefulness
            model = Sequential()
            kwargs = {'dim_ordering': dim_ordering,
                      'return_sequences': return_sequences,
                      'nb_filter': nb_filter,
                      'nb_row': nb_row,
                      'nb_col': nb_col,
                      'stateful': True,
                      'batch_input_shape': input.shape,
                      'border_mode': "same"}
            layer = convolutional_recurrent.ConvLSTM2D(**kwargs)

            model.add(layer)
            model.compile(optimizer='sgd', loss='mse')
            out1 = model.predict(np.ones_like(input))
            assert(out1.shape == tuple(output_shape))

            # train once so that the states change
            model.train_on_batch(np.ones_like(input),
                                 np.ones_like(output))
            out2 = model.predict(np.ones_like(input))

            # if the state is not reset, output should be different
            assert(out1.max() != out2.max())

            # check that output changes after states are reset
            # (even though the model itself didn't change)
            layer.reset_states()
            out3 = model.predict(np.ones_like(input))
            assert(out2.max() != out3.max())

            # check that container-level reset_states() works
            model.reset_states()
            out4 = model.predict(np.ones_like(input))
            assert_allclose(out3, out4, atol=1e-5)

            # check that the call to `predict` updated the states
            out5 = model.predict(np.ones_like(input))
            assert(out4.max() != out5.max())

            # check regularizers
            kwargs = {'dim_ordering': dim_ordering,
                      'return_sequences': return_sequences,
                      'nb_filter': nb_filter,
                      'nb_row': nb_row,
                      'nb_col': nb_col,
                      'stateful': True,
                      'batch_input_shape': input.shape,
                      'W_regularizer': regularizers.WeightRegularizer(l1=0.01),
                      'U_regularizer': regularizers.WeightRegularizer(l1=0.01),
                      'b_regularizer': 'l2',
                      'border_mode': "same"}

            layer = convolutional_recurrent.ConvLSTM2D(**kwargs)
            layer.set_input(K.variable(np.ones(input.shape)),
                            shape=input.shape)
            K.eval(layer.output)

            # check dropout
            layer_test(convolutional_recurrent.ConvLSTM2D,
                       kwargs={'dim_ordering': dim_ordering,
                               'return_sequences': return_sequences,
                               'nb_filter': nb_filter,
                               'nb_row': nb_row,
                               'nb_col': nb_col,
                               'border_mode': "same",
                               'dropout_W': 0.1,
                               'dropout_U': 0.1},
                       input_shape=input.shape)
Example #13
0
	def build_model(self,predict,custom_batch_size=None):
		conf = self.conf
		model_conf = conf['model']
		rnn_size = model_conf['rnn_size']
		rnn_type = model_conf['rnn_type']
		optimizer = model_conf['optimizer']
		lr = model_conf['lr']
		clipnorm = model_conf['clipnorm']
		regularization = model_conf['regularization']

		if optimizer == 'sgd':
			optimizer_class = SGD
		elif optimizer == 'adam':
			optimizer_class = Adam
		elif optimizer == 'rmsprop':
			optimizer_class = RMSprop 
		elif optimizer == 'nadam':
			optimizer_class = Nadam
		else:
			optimizer = optimizer

		if lr is not None or clipnorm is not None:
			optimizer = optimizer_class(lr = lr,clipnorm=clipnorm)

		loss_fn = conf['data']['target'].loss#model_conf['loss']
		dropout_prob = model_conf['dropout_prob']
		length = model_conf['length']
		pred_length = model_conf['pred_length']
		skip = model_conf['skip']
		stateful = model_conf['stateful']
		return_sequences = model_conf['return_sequences']
		output_activation = conf['data']['target'].activation#model_conf['output_activation']
		num_signals = conf['data']['num_signals']


		batch_size = self.conf['training']['batch_size']
		if predict:
			batch_size = self.conf['model']['pred_batch_size']
		    #so we can predict with one time point at a time!
			if return_sequences:
				length =pred_length
			else:
				length = 1

		if custom_batch_size is not None:
			batch_size = custom_batch_size

		if rnn_type == 'LSTM':
			rnn_model = LSTM
		elif rnn_type == 'SimpleRNN':
			rnn_model =SimpleRNN 
		else:
			print('Unkown Model Type, exiting.')
			exit(1)
		
		batch_input_shape=(batch_size,length, num_signals)
		model = Sequential()
		# model.add(TimeDistributed(Dense(num_signals,bias=True),batch_input_shape=batch_input_shape))
		for _ in range(model_conf['rnn_layers']):
			model.add(rnn_model(rnn_size, return_sequences=return_sequences,batch_input_shape=batch_input_shape,
			 stateful=stateful,W_regularizer=l2(regularization),U_regularizer=l2(regularization),
			 b_regularizer=l2(regularization),dropout_W=dropout_prob,dropout_U=dropout_prob))
			model.add(Dropout(dropout_prob))
		if return_sequences:
			model.add(TimeDistributed(Dense(1,activation=output_activation)))
		else:
			model.add(Dense(1,activation=output_activation))
		model.compile(loss=loss_fn, optimizer=optimizer)
		model.reset_states()
		#model.compile(loss='mean_squared_error', optimizer='sgd') #for numerical output
		return model
Example #14
0
def _runner(layer_class):
    """
    All the recurrent layers share the same interface,
    so we can run through them with a single function.
    """
    for ret_seq in [True, False]:
        layer = layer_class(output_dim, return_sequences=ret_seq, weights=None, input_shape=(timesteps, embedding_dim))
        layer.input = K.variable(np.ones((nb_samples, timesteps, embedding_dim)))
        layer.get_config()

        for train in [True, False]:
            out = K.eval(layer.get_output(train))
            # Make sure the output has the desired shape
            if ret_seq:
                assert out.shape == (nb_samples, timesteps, output_dim)
            else:
                assert out.shape == (nb_samples, output_dim)

            mask = layer.get_output_mask(train)

    # check statefulness
    model = Sequential()
    model.add(
        embeddings.Embedding(
            embedding_num,
            embedding_dim,
            mask_zero=True,
            input_length=timesteps,
            batch_input_shape=(nb_samples, timesteps),
        )
    )
    layer = layer_class(output_dim, return_sequences=False, stateful=True, weights=None)
    model.add(layer)
    model.compile(optimizer="sgd", loss="mse")
    out1 = model.predict(np.ones((nb_samples, timesteps)))
    assert out1.shape == (nb_samples, output_dim)

    # train once so that the states change
    model.train_on_batch(np.ones((nb_samples, timesteps)), np.ones((nb_samples, output_dim)))
    out2 = model.predict(np.ones((nb_samples, timesteps)))

    # if the state is not reset, output should be different
    assert out1.max() != out2.max()

    # check that output changes after states are reset
    # (even though the model itself didn't change)
    layer.reset_states()
    out3 = model.predict(np.ones((nb_samples, timesteps)))
    assert out2.max() != out3.max()

    # check that container-level reset_states() works
    model.reset_states()
    out4 = model.predict(np.ones((nb_samples, timesteps)))
    assert_allclose(out3, out4, atol=1e-5)

    # check that the call to `predict` updated the states
    out5 = model.predict(np.ones((nb_samples, timesteps)))
    assert out4.max() != out5.max()

    # Check masking
    layer.reset_states()

    left_padded_input = np.ones((nb_samples, timesteps))
    left_padded_input[0, :1] = 0
    left_padded_input[1, :2] = 0
    left_padded_input[2, :3] = 0
    out6 = model.predict(left_padded_input)

    layer.reset_states()

    right_padded_input = np.ones((nb_samples, timesteps))
    right_padded_input[0, -1:] = 0
    right_padded_input[1, -2:] = 0
    right_padded_input[2, -3:] = 0
    out7 = model.predict(right_padded_input)

    assert_allclose(out7, out6, atol=1e-5)
Example #15
0
def lstm_model(
        data, hidden_layer_neurons, epochs, batch_size=1,
        feature_dimensions=1, verbose=False):
    """Build an LSTM model.

    Args:
        data: Data frame of X, Y values
        hidden_layer_neurons: Number of neurons per layers
        epochs: Number of iterations for learning
        batch_size
        feature_dimensions: Dimension of features (Number of rows per feature)

    Returns:
        model: Graph of LSTM model

    """
    # Initialize key variables
    start = time.time()

    # Process the data for fitting
    x_values, y_values = data[:, 0: -1], data[:, -1]
    x_shaped = x_values.reshape(x_values.shape[0], 1, x_values.shape[1])

    # Let's do some learning!
    model = Sequential()

    '''
    The Long Short-Term Memory network (LSTM) is a type of Recurrent Neural
    Network (RNN).

    A benefit of this type of network is that it can learn and remember over
    long sequences and does not rely on a pre-specified window lagged
    observation as input.

    In Keras, this is referred to as being "stateful", and involves setting the
    "stateful" argument to "True" when defining an LSTM layer.

    By default, an LSTM layer in Keras maintains state between data within
    one batch. A batch of data is a fixed-sized number of rows from the
    training dataset that defines how many patterns (sequences) to process
    before updating the weights of the network.

    A state is:
        Where am I now inside a sequence? Which time step is it? How is this
        particular sequence behaving since its beginning up to now?

    A weight is: What do I know about the general behavior of all sequences
        I've seen so far?

    State in the LSTM layer between batches is cleared by default. This is
    undesirable therefore we must make the LSTM stateful. This gives us
    fine-grained control over when state of the LSTM layer is cleared, by
    calling the reset_states() function during the model.fit() method.

    LSTM networks can be stacked in Keras in the same way that other layer
    types can be stacked. One addition to the configuration that is required
    is that an LSTM layer prior to each subsequent LSTM layer must return the
    sequence. This can be done by setting the return_sequences parameter on
    the layer to True.

    batch_size denotes the subset size of your training sample (e.g. 100 out
    of 1000) which is going to be used in order to train the network during its
    learning process. Each batch trains network in a successive order, taking
    into account the updated weights coming from the appliance of the previous
    batch.

    return_sequence indicates if a recurrent layer of the network should return
    its entire output sequence (i.e. a sequence of vectors of specific
    dimension) to the next layer of the network, or just its last only output
    which is a single vector of the same dimension. This value can be useful
    for networks conforming with an RNN architecture.

    batch_input_shape defines that the sequential classification of the
    neural network can accept input data of the defined only batch size,
    restricting in that way the creation of any variable dimension vector.
    It is widely used in stacked LSTM networks. It is a tuple of (batch_size,
    timesteps, data_dimension)
    '''
    timesteps = x_shaped.shape[1]
    data_dimension = x_shaped.shape[2]

    # Add layers to the model
    model.add(
        LSTM(
            units=hidden_layer_neurons,
            batch_input_shape=(batch_size, timesteps, data_dimension),
            return_sequences=True,
            stateful=True
        )
    )
    model.add(Dropout(0.2))

    model.add(
        LSTM(
            units=hidden_layer_neurons,
            batch_input_shape=(batch_size, timesteps, data_dimension),
            return_sequences=False,
            stateful=True
        )
    )
    model.add(Dropout(0.2))

    model.add(
        Dense(
            units=feature_dimensions
        )
    )
    # model.add(Activation('linear'))

    '''
    Once the network is specified, it must be compiled into an efficient
    symbolic representation using a backend mathematical library,
    such as TensorFlow.

    In compiling the network, we must specify a loss function and optimization
    algorithm. We will use "mean_squared_error" or "mse" as the loss function
    as it closely matches RMSE that we will are interested in, and the
    efficient ADAM optimization algorithm.
    '''
    model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])

    '''
    Once the model is compiled, the network can be fit to the training data.
    Because the network is stateful, we must control when the internal state
    is reset. Therefore, we must manually manage the training process one epoch
    at a time across the desired number of epochs.

    By default, the samples within an epoch are shuffled prior to being exposed
    to the network. Again, this is undesirable for the LSTM because we want the
    network to build up state as it learns across the sequence of observations.
    We can disable the shuffling of samples by setting "shuffle" to "False".
    '''
    for _ in range(epochs):
        model.fit(
            x_shaped,
            y_values,
            batch_size=batch_size,
            shuffle=False,
            epochs=1,
            verbose=verbose,
            validation_split=0.05)

        '''
        When the fit process reaches the total length of the samples,
        model.reset_states() is called to reset the internal state at the end
        of the training epoch, ready for the next training iteration.

        This iteration will start training from the beginning of the dataset
        therefore state will need to be reset as the previous state would only
        be relevant to the prior epoch iteration.
        '''
        model.reset_states()

    print('\n> Training Time: {:20.2f}'.format(time.time() - start))
    return model
Example #16
0
File: dkt.py Project: asmith26/dkt
def main():
    parser = argparse.ArgumentParser(description='Process some integers.')
    parser.add_argument('--dataset', type=str, help='Dataset file', required=True)
    parser.add_argument('--splitfile', type=str, help='Split file', required=True)
    parser.add_argument('--hiddenunits', type=int, help='Number of LSTM hidden units.', 
                        default=200, required=False)
    parser.add_argument('--batchsize', type=int, help='Number of sequences to process in a batch.',
                        default=5, required=False)
    parser.add_argument('--timewindow', type=int, help='Number of timesteps to process in a batch.',
                        default=100, required=False)
    parser.add_argument('--epochs', type=int, help='Number of epochs.',
                        default=50, required=False)
    args = parser.parse_args()
    
    dataset = args.dataset
    split_file = args.splitfile
    hidden_units = args.hiddenunits
    batch_size = args.batchsize
    time_window = args.timewindow
    epochs = args.epochs
    
    model_file = dataset + '.model_weights'
    history_file = dataset + '.history'
    preds_file = dataset + '.preds'
    
    overall_loss = [0.0]
    preds = []
    history = []
    
    # load dataset
    training_seqs, testing_seqs, num_skills = load_dataset(dataset, split_file)
    print "Training Sequences: %d" % len(training_seqs)
    print "Testing Sequences: %d" % len(testing_seqs)
    print "Number of skills: %d" % num_skills
    
    # Our loss function
    # The model gives predictions for all skills so we need to get the 
    # prediction for the skill at time t. We do that by taking the column-wise
    # dot product between the predictions at each time slice and a
    # one-hot encoding of the skill at time t.
    # y_true: (nsamples x nsteps x nskills+1)
    # y_pred: (nsamples x nsteps x nskills)
    def loss_function(y_true, y_pred):
        skill = y_true[:,:,0:num_skills]
        obs = y_true[:,:,num_skills]
        rel_pred = Th.sum(y_pred * skill, axis=2)
        
        # keras implementation does a mean on the last dimension (axis=-1) which
        # it assumes is a singleton dimension. But in our context that would
        # be wrong.
        return K.binary_crossentropy(rel_pred, obs)
    
    
    # build model
    model = Sequential()
    
    # ignore padding
    model.add(Masking(-1.0, batch_input_shape=(batch_size, time_window, num_skills*2)))
    
    # lstm configured to keep states between batches
    model.add(LSTM(input_dim = num_skills*2, 
                   output_dim = hidden_units, 
                   return_sequences=True,
                   batch_input_shape=(batch_size, time_window, num_skills*2),
                   stateful = True
    ))
    
    # readout layer. TimeDistributedDense uses the same weights for all
    # time steps.
    model.add(TimeDistributedDense(input_dim = hidden_units, 
        output_dim = num_skills, activation='sigmoid'))
    
    # optimize with rmsprop which dynamically adapts the learning
    # rate of each weight.
    model.compile(loss=loss_function,
                optimizer='rmsprop',class_mode="binary")

    # training function
    def trainer(X, Y):
        overall_loss[0] += model.train_on_batch(X, Y)[0]
    
    # prediction
    def predictor(X, Y):
        batch_activations = model.predict_on_batch(X)
        skill = Y[:,:,0:num_skills]
        obs = Y[:,:,num_skills]
        y_pred = np.squeeze(np.array(batch_activations))
        
        rel_pred = np.sum(y_pred * skill, axis=2)
        
        for b in xrange(0, X.shape[0]):
            for t in xrange(0, X.shape[1]):
                if X[b, t, 0] == -1.0:
                    continue
                preds.append((rel_pred[b][t], obs[b][t]))
        
    # call when prediction batch is finished
    # resets LSTM state because we are done with all sequences in the batch
    def finished_prediction_batch(percent_done):
        model.reset_states()
        
    # similiar to the above
    def finished_batch(percent_done):
        print "(%4.3f %%) %f" % (percent_done, overall_loss[0])
        model.reset_states()
        
    # run the model
    for e in xrange(0, epochs):
        model.reset_states()
        
        # train
        run_func(training_seqs, num_skills, trainer, batch_size, time_window, finished_batch)
        
        model.reset_states()
        
        # test
        run_func(testing_seqs, num_skills, predictor, batch_size, time_window, finished_prediction_batch)
        
        # compute AUC
        auc = roc_auc_score([p[1] for p in preds], [p[0] for p in preds])
        
        # log
        history.append((overall_loss[0], auc))
        
        # save model
        model.save_weights(model_file, overwrite=True)
        print "==== Epoch: %d, Test AUC: %f" % (e, auc)
        
        # reset loss
        overall_loss[0] = 0.0
        
        # save predictions
        with open(preds_file, 'w') as f:
            f.write('was_heldout\tprob_recall\tstudent_recalled\n')
            for pred in preds:
                f.write('1\t%f\t%d\n' % (pred[0], pred[1]))
        
        with open(history_file, 'w') as f:
            for h in history:
                f.write('\t'.join([str(he) for he in h]))
                f.write('\n')
                
        # clear preds
        preds = []
Example #17
0
	#Save weights
	name = 'neumonia_dataset_interson_keras_alldata_{0}_weights_cnn_{0}.h5'.format(number_db,(number_db))
	print(name)
	model.save_weights(name,overwrite=True)

	#l = h5py.File("loss_history_{0}.hdf5".format(number_db), "w")
	#dset = f.create_dataset("loss_history_{0}".format(number_db), (100,), dtype='i')


	#A way to open a model with weights in the same arquitecture
	'''
	json_string = model.to_json()
	open('my_model_architecture.json', 'w').write(json_string)
	model.save_weights('my_model_weights.h5')
	'''
	import cPickle
	f = open('cnn_loss_{0}.pkl'.format(number_db),'wb')
	cPickle.dump(history.losses,f,protocol=cPickle.HIGHEST_PROTOCOL)
	f.close()
	
	h = open('cnn_metrics_{0}.pkl'.format(number_db),'wb')
	p = [sensitivity, specificity, F1, mcc]
	cPickle.dump(p ,h,protocol=cPickle.HIGHEST_PROTOCOL)
	h.close()
	model.reset_states()
	#import matplotlib.pylab as plt
	#plt.plot(history.losses,'bo')
	#plt.xlabel('Iteration')
	#plt.ylabel('Binary Cross Entropy')
	#plt.show()
Example #18
0
def generate_lstm_gmm(seq, maxlen=1, bs=500, ep=2, output_iterations=10, num_mixture_components=3):
    # seq is a single sample, in the format (timesteps, features) !
    # TODO: expand code to support multiple samples, fed into model together as a batch
    # Cut the timeseries data (variable name 'seq') into semi-redundant sequence chunks of maxlen

    X = []
    y = []

    for i in range(0, len(seq) - maxlen):
        X.append(seq[i:i+maxlen])
        y.append(seq[i+maxlen])

    dim = len((X[0][0]))

    print("sequence chunks:", len(X))
    print("chunk width:", len(X[0]))
    print("vector dimension:", dim)
    print("number of mixture components:", num_mixture_components)
    print("batch size:", bs)

    X = np.array(X)
    y = np.array(y)
    
    # build the model: 2 stacked LSTM
    print('Build model...')
    model = Sequential()
    model.reset_states()
    model.add(LSTM((dim+2) * num_mixture_components, return_sequences=False, input_shape=(maxlen, dim)))
    model.add(Dense((dim+2) * num_mixture_components))
    
    model.add(GMMActivation(num_mixture_components))

    model.compile(loss=gmm_loss, optimizer=RMSprop(lr=0.001))

    # Train the model
    model.fit(X, y, batch_size=bs, nb_epoch=ep)

    # Generate timeseries
    x_seed = X[len(X)-1] #choose final in-sample data point to initialize model
    x_array = []
    x_array.append(x_seed)
    x = np.array(x_array)

    predicted = []
    for i in range(output_iterations):
        pred_parameters = model.predict_on_batch(x)[0]

        means = pred_parameters[:num_mixture_components * dim]
        sds = pred_parameters[(num_mixture_components * dim):(num_mixture_components * (dim+1))]
        weights = pred_parameters[(num_mixture_components * (dim + 1)):]

        print(means)
        print(sds)
        print(weights)

        means = means.reshape(num_mixture_components, dim)
        sds = sds[:, np.newaxis]
        weights = weights[:, np.newaxis]
        
        pred = weights * np.random.normal(means, sds)
        pred = np.sum(pred, axis=0)
        predicted.append(pred)

    return predicted
Example #19
0
def _runner(layer_class):
    """
    All the recurrent layers share the same interface,
    so we can run through them with a single function.
    """
    # check return_sequences
    layer_test(layer_class,
               kwargs={'output_dim': output_dim,
                       'return_sequences': True},
               input_shape=(3, 2, 3))

    # check dropout
    layer_test(layer_class,
               kwargs={'output_dim': output_dim,
                       'dropout_U': 0.1,
                       'dropout_W': 0.1},
               input_shape=(3, 2, 3))

    # check implementation modes
    for mode in ['cpu', 'mem', 'gpu']:
        layer_test(layer_class,
                   kwargs={'output_dim': output_dim,
                           'consume_less': mode},
                   input_shape=(3, 2, 3))

    # check statefulness
    model = Sequential()
    model.add(embeddings.Embedding(embedding_num, embedding_dim,
                                   mask_zero=True,
                                   input_length=timesteps,
                                   batch_input_shape=(nb_samples, timesteps)))
    layer = layer_class(output_dim, return_sequences=False,
                        stateful=True,
                        weights=None)
    model.add(layer)
    model.compile(optimizer='sgd', loss='mse')
    out1 = model.predict(np.ones((nb_samples, timesteps)))
    assert(out1.shape == (nb_samples, output_dim))

    # train once so that the states change
    model.train_on_batch(np.ones((nb_samples, timesteps)),
                         np.ones((nb_samples, output_dim)))
    out2 = model.predict(np.ones((nb_samples, timesteps)))

    # if the state is not reset, output should be different
    assert(out1.max() != out2.max())

    # check that output changes after states are reset
    # (even though the model itself didn't change)
    layer.reset_states()
    out3 = model.predict(np.ones((nb_samples, timesteps)))
    assert(out2.max() != out3.max())

    # check that container-level reset_states() works
    model.reset_states()
    out4 = model.predict(np.ones((nb_samples, timesteps)))
    assert_allclose(out3, out4, atol=1e-5)

    # check that the call to `predict` updated the states
    out5 = model.predict(np.ones((nb_samples, timesteps)))
    assert(out4.max() != out5.max())

    # Check masking
    layer.reset_states()

    left_padded_input = np.ones((nb_samples, timesteps))
    left_padded_input[0, :1] = 0
    left_padded_input[1, :2] = 0
    left_padded_input[2, :3] = 0
    out6 = model.predict(left_padded_input)

    layer.reset_states()

    right_padded_input = np.ones((nb_samples, timesteps))
    right_padded_input[0, -1:] = 0
    right_padded_input[1, -2:] = 0
    right_padded_input[2, -3:] = 0
    out7 = model.predict(right_padded_input)

    assert_allclose(out7, out6, atol=1e-5)

    # check regularizers
    layer = layer_class(output_dim, return_sequences=False, weights=None,
                        batch_input_shape=(nb_samples, timesteps, embedding_dim),
                        W_regularizer=regularizers.WeightRegularizer(l1=0.01),
                        U_regularizer=regularizers.WeightRegularizer(l1=0.01),
                        b_regularizer='l2')
    shape = (nb_samples, timesteps, embedding_dim)
    layer.set_input(K.variable(np.ones(shape)),
                    shape=shape)
    K.eval(layer.output)
Example #20
0
File: main.py Project: cy94/ml2
def generate_audio(X, Y, seed_X):
	"""
	X: array of input sequences 
	Y: next value for each input sequence
	seed_X: a single sequence to use as seed for generation
	"""
	# reshape to input format needed for the NN
	X = np.reshape(X, (X.shape[0], X.shape[1], 1))
	seed_X = np.reshape(seed_X, (seed_X.shape[0], seed_X.shape[1], 1))

	# train new model or use pre trained model?
	USE_SAVED_MODEL = False
	model_arch_file = 'model_architecture.json'
	model_weight_file = 'model_weights.h5'

	print "Architecture file:", model_arch_file
	print "Weight file:", model_weight_file

	model = None

	if USE_SAVED_MODEL:
		print "Loading model ..."
		model = model_from_json(open(model_arch_file).read())
		model.load_weights(model_weight_file)
	else:
		model = Sequential()
		layers = [1, 10, 20, 1]

		# add layers
		model.add(LSTM(
	            input_dim=layers[0],
	            output_dim=layers[1],
	            return_sequences=True,
	            # stateful=True,
	            # batch_input_shape=(32, 49, 1)
	            ))
		model.add(Dropout(0.2))

		model.add(LSTM(
	            layers[2],
	            return_sequences=False,
	            # stateful=True,
	            # batch_input_shape=(32, 49, 1)
	            ))
		model.add(Dropout(0.2))

		model.add(Dense(
	            output_dim=layers[3]))
		model.add(Activation("linear"))


		# save model
		print "Saving model ..."
		json_string = model.to_json()
		open(model_arch_file, 'w').write(json_string)
		model.save_weights(model_weight_file, overwrite=True)

	# compile model in both cases
	start = time.time()
	print "Started compilation: ", start
	model.compile(loss="mse", optimizer="rmsprop")
	print "Compilation Time: ", time.time() - start

	# train if using new model
	if not USE_SAVED_MODEL:
		# train
		model.fit(X, Y, 
			batch_size=32, 
			nb_epoch=5,
			validation_split=0.05
			)

	# generate new sequence
	model.reset_states()
	
	gen_seconds = 3
	generated = [None for i in range(DEFAULT_RATE) * gen_seconds]

	# generate 5 seconds of new music
	print seed_X.shape
	for i in xrange(DEFAULT_RATE * gen_seconds):
		sys.stdout.write("\r" + str(float(i)/(DEFAULT_RATE * gen_seconds)))
		predicted = model.predict(seed_X)[0]
		generated[i] = predicted
		seed_X[0,:,0] = np.append(seed_X[0,1:,0], predicted)
	
	return np.array(generated)