Ejemplo n.º 1
0
    def testIrisRNN(self):
        import numpy as np
        data = [
            "I can do this", "I believe myself", "I am okay", "Not good, man",
            "Bad mood today", "Feeling sick now"
        ]
        labels = [1, 1, 1, 0, 0, 0]
        MAX_DOCUMENT_LENGTH = 6
        EMBEDDING_SIZE = 10
        vocab_processor = skflow.preprocessing.VocabularyProcessor(
            MAX_DOCUMENT_LENGTH)
        data = np.array(list(vocab_processor.fit_transform(data)))
        n_words = len(vocab_processor.vocabulary_)

        def input_op_fn(X):
            word_vectors = skflow.ops.categorical_variable(
                X,
                n_classes=n_words,
                embedding_size=EMBEDDING_SIZE,
                name='words')
            word_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH,
                                                 word_vectors)
            return word_list

        random.seed(42)
        # Only declare them for now
        # TODO: Add test case once we have data set in the repo
        classifier = skflow.TensorFlowRNNClassifier(rnn_size=5,
                                                    cell_type='gru',
                                                    input_op_fn=input_op_fn,
                                                    n_classes=3)
        classifier = skflow.TensorFlowRNNClassifier(rnn_size=5,
                                                    cell_type='rnn',
                                                    input_op_fn=input_op_fn,
                                                    n_classes=3)
        classifier = skflow.TensorFlowRNNClassifier(rnn_size=5,
                                                    cell_type='lstm',
                                                    input_op_fn=input_op_fn,
                                                    n_classes=3)
        with self.assertRaises(ValueError):
            classifier = skflow.TensorFlowRNNClassifier(
                rnn_size=5,
                cell_type='invalid_type',
                input_op_fn=input_op_fn,
                n_classes=3)
            classifier._model_fn(data, labels)
        classifier = skflow.TensorFlowRNNRegressor(rnn_size=5,
                                                   cell_type='gru',
                                                   input_op_fn=input_op_fn,
                                                   n_classes=0)
Ejemplo n.º 2
0
    def testRNN(self):
        random.seed(42)
        import numpy as np
        data = np.array(list([[2, 1, 2, 2, 3], [2, 2, 3, 4, 5],
                              [3, 3, 1, 2, 1], [2, 4, 5, 4, 1]]),
                        dtype=np.float32)
        # labels for classification
        labels = np.array(list([1, 0, 1, 0]), dtype=np.float32)
        # targets for regression
        targets = np.array(list([10, 16, 10, 16]), dtype=np.float32)
        test_data = np.array(list([[1, 3, 3, 2, 1], [2, 3, 4, 5, 6]]))

        def input_fn(X):
            return tf.split(1, 5, X)

        # Classification
        classifier = skflow.TensorFlowRNNClassifier(rnn_size=2,
                                                    cell_type='lstm',
                                                    n_classes=2,
                                                    input_op_fn=input_fn)
        classifier.fit(data, labels)
        classifier.weights_
        classifier.bias_
        predictions = classifier.predict(test_data)
        self.assertAllClose(predictions, np.array([1, 0]))

        classifier = skflow.TensorFlowRNNClassifier(rnn_size=2,
                                                    cell_type='rnn',
                                                    n_classes=2,
                                                    input_op_fn=input_fn,
                                                    num_layers=2)
        classifier.fit(data, labels)
        classifier = skflow.TensorFlowRNNClassifier(
            rnn_size=2,
            cell_type='invalid_cell_type',
            n_classes=2,
            input_op_fn=input_fn,
            num_layers=2)
        with self.assertRaises(ValueError):
            classifier.fit(data, labels)

        # Regression
        regressor = skflow.TensorFlowRNNRegressor(rnn_size=2,
                                                  cell_type='gru',
                                                  input_op_fn=input_fn)
        regressor.fit(data, targets)
        regressor.weights_
        regressor.bias_
        predictions = regressor.predict(test_data)
Ejemplo n.º 3
0
    def testRNN(self):
        random.seed(42)
        import numpy as np
        data = np.array(list([[2, 1, 2, 2, 3], [2, 2, 3, 4, 5],
                              [3, 3, 1, 2, 1], [2, 4, 5, 4, 1]]),
                        dtype=np.float32)
        labels = np.array(list([1, 0, 1, 0]), dtype=np.float32)

        def input_fn(X):
            return tf.split(1, 5, X)

        # Classification
        classifier = skflow.TensorFlowRNNClassifier(rnn_size=2,
                                                    cell_type='lstm',
                                                    n_classes=2,
                                                    input_op_fn=input_fn)
        classifier.fit(data, labels)
        predictions = classifier.predict(
            np.array(list([[1, 3, 3, 2, 1], [2, 3, 4, 5, 6]])))
        self.assertAllClose(predictions, np.array([1, 0]))

        classifier = skflow.TensorFlowRNNClassifier(rnn_size=2,
                                                    cell_type='gru',
                                                    n_classes=2,
                                                    input_op_fn=input_fn)
        classifier = skflow.TensorFlowRNNClassifier(rnn_size=2,
                                                    cell_type='rnn',
                                                    n_classes=2,
                                                    input_op_fn=input_fn,
                                                    num_layers=2)

        # Regression
        classifier = skflow.TensorFlowRNNRegressor(rnn_size=2,
                                                   cell_type='lstm',
                                                   input_op_fn=input_fn)
        classifier.fit(data, labels)
        predictions = classifier.predict(
            np.array(list([[1, 3, 3, 2, 1], [2, 3, 4, 5, 6]])))
### Models

EMBEDDING_SIZE = 50

# Customized function to transform batched X into embeddings
def input_op_fn(X):
    # Convert indexes of words into embeddings.
    # This creates embeddings matrix of [n_words, EMBEDDING_SIZE] and then
    # maps word indexes of the sequence into [batch_size, sequence_length,
    # EMBEDDING_SIZE].
    word_vectors = skflow.ops.categorical_variable(X, n_classes=n_words,
        embedding_size=EMBEDDING_SIZE, name='words')
    # Split into list of embedding per word, while removing doc length dim.
    # word_list results to be a list of tensors [batch_size, EMBEDDING_SIZE].
    word_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, word_vectors)
    return word_list

# Single direction GRU with a single layer
classifier = skflow.TensorFlowRNNClassifier(rnn_size=EMBEDDING_SIZE, 
    n_classes=15, cell_type='gru', input_op_fn=input_op_fn,
    num_layers=1, bidirection=False, sequence_length=None,
    steps=1000, optimizer='Adam', learning_rate=0.01, continue_training=True)

# Continously train for 1000 steps & predict on test set.
while True:
    classifier.fit(X_train, y_train, logdir='/tmp/tf_examples/word_rnn')
    score = metrics.accuracy_score(y_test, classifier.predict(X_test))
    print('Accuracy: {0:f}'.format(score))


# Customized function to transform input X into embeddings
def input_op_fn(X):
    # Convert indexes of words into embeddings.
    # This creates embeddings matrix of [n_words, EMBEDDING_SIZE] and then
    # maps word indexes of the sequence into [batch_size, sequence_length,
    # EMBEDDING_SIZE].
    word_vectors = skflow.ops.categorical_variable(
        X, n_classes=n_words, embedding_size=EMBEDDING_SIZE, name='words')
    # Split into list of embedding per word, while removing doc length dim.
    # word_list results to be a list of tensors [batch_size, EMBEDDING_SIZE].
    word_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, word_vectors)
    return word_list


classifier = skflow.TensorFlowRNNClassifier(rnn_size=EMBEDDING_SIZE,
                                            n_classes=15,
                                            cell_type='gru',
                                            input_op_fn=input_op_fn,
                                            steps=1000,
                                            optimizer='Adam',
                                            learning_rate=0.01,
                                            continue_training=True)

# Continously train for 1000 steps & predict on test set.
while True:
    classifier.fit(X_train, y_train, logdir='/tmp/tf_examples/word_rnn')
    score = metrics.accuracy_score(y_test, classifier.predict(X_test))
    print('Accuracy: {0:f}'.format(score))