def testIrisRNN(self): import numpy as np data = [ "I can do this", "I believe myself", "I am okay", "Not good, man", "Bad mood today", "Feeling sick now" ] labels = [1, 1, 1, 0, 0, 0] MAX_DOCUMENT_LENGTH = 6 EMBEDDING_SIZE = 10 vocab_processor = skflow.preprocessing.VocabularyProcessor( MAX_DOCUMENT_LENGTH) data = np.array(list(vocab_processor.fit_transform(data))) n_words = len(vocab_processor.vocabulary_) def input_op_fn(X): word_vectors = skflow.ops.categorical_variable( X, n_classes=n_words, embedding_size=EMBEDDING_SIZE, name='words') word_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, word_vectors) return word_list random.seed(42) # Only declare them for now # TODO: Add test case once we have data set in the repo classifier = skflow.TensorFlowRNNClassifier(rnn_size=5, cell_type='gru', input_op_fn=input_op_fn, n_classes=3) classifier = skflow.TensorFlowRNNClassifier(rnn_size=5, cell_type='rnn', input_op_fn=input_op_fn, n_classes=3) classifier = skflow.TensorFlowRNNClassifier(rnn_size=5, cell_type='lstm', input_op_fn=input_op_fn, n_classes=3) with self.assertRaises(ValueError): classifier = skflow.TensorFlowRNNClassifier( rnn_size=5, cell_type='invalid_type', input_op_fn=input_op_fn, n_classes=3) classifier._model_fn(data, labels) classifier = skflow.TensorFlowRNNRegressor(rnn_size=5, cell_type='gru', input_op_fn=input_op_fn, n_classes=0)
def testRNN(self): random.seed(42) import numpy as np data = np.array(list([[2, 1, 2, 2, 3], [2, 2, 3, 4, 5], [3, 3, 1, 2, 1], [2, 4, 5, 4, 1]]), dtype=np.float32) # labels for classification labels = np.array(list([1, 0, 1, 0]), dtype=np.float32) # targets for regression targets = np.array(list([10, 16, 10, 16]), dtype=np.float32) test_data = np.array(list([[1, 3, 3, 2, 1], [2, 3, 4, 5, 6]])) def input_fn(X): return tf.split(1, 5, X) # Classification classifier = skflow.TensorFlowRNNClassifier(rnn_size=2, cell_type='lstm', n_classes=2, input_op_fn=input_fn) classifier.fit(data, labels) classifier.weights_ classifier.bias_ predictions = classifier.predict(test_data) self.assertAllClose(predictions, np.array([1, 0])) classifier = skflow.TensorFlowRNNClassifier(rnn_size=2, cell_type='rnn', n_classes=2, input_op_fn=input_fn, num_layers=2) classifier.fit(data, labels) classifier = skflow.TensorFlowRNNClassifier( rnn_size=2, cell_type='invalid_cell_type', n_classes=2, input_op_fn=input_fn, num_layers=2) with self.assertRaises(ValueError): classifier.fit(data, labels) # Regression regressor = skflow.TensorFlowRNNRegressor(rnn_size=2, cell_type='gru', input_op_fn=input_fn) regressor.fit(data, targets) regressor.weights_ regressor.bias_ predictions = regressor.predict(test_data)
def testRNN(self): random.seed(42) import numpy as np data = np.array(list([[2, 1, 2, 2, 3], [2, 2, 3, 4, 5], [3, 3, 1, 2, 1], [2, 4, 5, 4, 1]]), dtype=np.float32) labels = np.array(list([1, 0, 1, 0]), dtype=np.float32) def input_fn(X): return tf.split(1, 5, X) # Classification classifier = skflow.TensorFlowRNNClassifier(rnn_size=2, cell_type='lstm', n_classes=2, input_op_fn=input_fn) classifier.fit(data, labels) predictions = classifier.predict( np.array(list([[1, 3, 3, 2, 1], [2, 3, 4, 5, 6]]))) self.assertAllClose(predictions, np.array([1, 0])) classifier = skflow.TensorFlowRNNClassifier(rnn_size=2, cell_type='gru', n_classes=2, input_op_fn=input_fn) classifier = skflow.TensorFlowRNNClassifier(rnn_size=2, cell_type='rnn', n_classes=2, input_op_fn=input_fn, num_layers=2) # Regression classifier = skflow.TensorFlowRNNRegressor(rnn_size=2, cell_type='lstm', input_op_fn=input_fn) classifier.fit(data, labels) predictions = classifier.predict( np.array(list([[1, 3, 3, 2, 1], [2, 3, 4, 5, 6]])))
### Models EMBEDDING_SIZE = 50 # Customized function to transform batched X into embeddings def input_op_fn(X): # Convert indexes of words into embeddings. # This creates embeddings matrix of [n_words, EMBEDDING_SIZE] and then # maps word indexes of the sequence into [batch_size, sequence_length, # EMBEDDING_SIZE]. word_vectors = skflow.ops.categorical_variable(X, n_classes=n_words, embedding_size=EMBEDDING_SIZE, name='words') # Split into list of embedding per word, while removing doc length dim. # word_list results to be a list of tensors [batch_size, EMBEDDING_SIZE]. word_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, word_vectors) return word_list # Single direction GRU with a single layer classifier = skflow.TensorFlowRNNClassifier(rnn_size=EMBEDDING_SIZE, n_classes=15, cell_type='gru', input_op_fn=input_op_fn, num_layers=1, bidirection=False, sequence_length=None, steps=1000, optimizer='Adam', learning_rate=0.01, continue_training=True) # Continously train for 1000 steps & predict on test set. while True: classifier.fit(X_train, y_train, logdir='/tmp/tf_examples/word_rnn') score = metrics.accuracy_score(y_test, classifier.predict(X_test)) print('Accuracy: {0:f}'.format(score))
# Customized function to transform input X into embeddings def input_op_fn(X): # Convert indexes of words into embeddings. # This creates embeddings matrix of [n_words, EMBEDDING_SIZE] and then # maps word indexes of the sequence into [batch_size, sequence_length, # EMBEDDING_SIZE]. word_vectors = skflow.ops.categorical_variable( X, n_classes=n_words, embedding_size=EMBEDDING_SIZE, name='words') # Split into list of embedding per word, while removing doc length dim. # word_list results to be a list of tensors [batch_size, EMBEDDING_SIZE]. word_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, word_vectors) return word_list classifier = skflow.TensorFlowRNNClassifier(rnn_size=EMBEDDING_SIZE, n_classes=15, cell_type='gru', input_op_fn=input_op_fn, steps=1000, optimizer='Adam', learning_rate=0.01, continue_training=True) # Continously train for 1000 steps & predict on test set. while True: classifier.fit(X_train, y_train, logdir='/tmp/tf_examples/word_rnn') score = metrics.accuracy_score(y_test, classifier.predict(X_test)) print('Accuracy: {0:f}'.format(score))