Esempio n. 1
0
def load_data():
    ''' '''
    print('Loading data...')
    (x_train, y_train), (x_test,
                         y_test) = imdb.load_data(num_words=max_features)
    print(len(x_train), 'train sequences')
    print(len(x_test), 'test sequences')

    print('Pad sequences (samples x time)')
    x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
    x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
    print('x_train shape:', x_train.shape)
    print('x_test shape:', x_test.shape)
    return [x_train, y_train, x_test, y_test]
Esempio n. 2
0
def main(unused_argv):
  # Loading the data
  # data from: https://keras.io/datasets/
  # Dataset of 25,000 movies reviews from IMDB, labeled by sentiment
  # (positive/negative).
  # Reviews have been preprocessed, and each review is encoded as a sequence
  # of word indexes (integers).
  # For convenience, words are indexed by overall frequency in the dataset.

  print('Loading data...')
  (x_train, y_train), (x_test, y_test) = imdb.load_data(
      num_words=FLAGS.num_words)

  print('size of the train dataset:', x_train.shape[0])
  print('size of the test dataset:', x_test.shape[0])

  # run experiment
  run_config = tf.contrib.learn.RunConfig(model_dir=FLAGS.model_dir)
  learn_runner.run(generate_experiment_fn(x_train, y_train, x_test, y_test),
                   run_config=run_config)
Esempio n. 3
0
def main(unused_argv):
    # Loading the data
    # data from: https://keras.io/datasets/
    # Dataset of 25,000 movies reviews from IMDB, labeled by sentiment
    # (positive/negative).
    # Reviews have been preprocessed, and each review is encoded as a sequence
    # of word indexes (integers).
    # For convenience, words are indexed by overall frequency in the dataset.
    print('Loading data...')

    (x_train, y_train), (x_test,
                         y_test) = imdb.load_data(num_words=FLAGS.num_words)

    print('size of the train dataset:', x_train.shape[0])
    print('size of the test dataset:', x_test.shape[0])

    # run config
    run_config = tf.estimator.RunConfig()

    # creates estimator
    if FLAGS.use_canned_estimator:
        xc = tf.contrib.layers.sparse_column_with_integerized_feature(
            'x', FLAGS.num_words)
        xc = tf.contrib.layers.embedding_column(xc, FLAGS.embed_dim)

        # creates estimator
        estimator = tf.contrib.learn.DynamicRnnEstimator(
            config=run_config,
            model_dir=FLAGS.model_dir,
            problem_type=constants.ProblemType.CLASSIFICATION,
            prediction_type=PredictionType.SINGLE_VALUE,
            sequence_feature_columns=[xc],
            context_feature_columns=None,
            num_units=FLAGS.num_rnn_units,
            cell_type=FLAGS.cell_type,
            optimizer=FLAGS.optimizer,
            learning_rate=FLAGS.learning_rate,
            num_classes=FLAGS.num_classes,
            dropout_keep_probabilities=FLAGS.dropout_keep_probabilities)

    else:
        model_fn = CustomRNNEstimator(rnn_cell_sizes=FLAGS.num_rnn_units,
                                      label_dimension=FLAGS.num_classes,
                                      num_words=FLAGS.num_words,
                                      dnn_layer_sizes=FLAGS.num_dnn_units,
                                      optimizer=FLAGS.optimizer,
                                      learning_rate=FLAGS.learning_rate,
                                      embed_dim=FLAGS.embed_dim)
        estimator = tf.estimator.Estimator(model_fn=model_fn,
                                           model_dir=FLAGS.model_dir,
                                           config=run_config)

    # input functions
    train_input = get_input_fn(x_train,
                               y_train,
                               FLAGS.train_batch_size,
                               epochs=FLAGS.num_epochs,
                               max_length=FLAGS.max_len,
                               batch_by_seq_len=FLAGS.batch_by_seq_len)

    test_input = get_input_fn(x_test,
                              y_test,
                              FLAGS.eval_batch_size,
                              epochs=1,
                              max_length=FLAGS.max_len)

    # training
    # estimator.train(input_fn=train_input)

    # evalutaion
    # estimator.evaluate(input_fn=test_input)

    # predict
    predictions = list(estimator.predict(input_fn=test_input))

    # loading map from word to index and index to word
    word_to_index, index_to_word = _load_map_dicts()

    for i in range(5):
        index = random.randint(
            0, x_test.shape[0])  # choose random index in the test dataset
        print(_ids_to_sentence(x_test[index], index_to_word))
        print('Prediction:', predictions[index])
        print('Label:', y_test[index])
Esempio n. 4
0
from tensorflow.contrib.keras.python.keras.datasets import imdb
from tensorflow.contrib.keras.python.keras.layers import Embedding, SimpleRNN, Dropout, Dense, Activation, LSTM, GRU
from tensorflow.contrib.keras.python.keras.models import Sequential
from tensorflow.contrib.keras.python.keras.preprocessing import sequence

max_features = 20000
maxlen = 100
batch_size = 32


(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
print('X_train shape:', X_train.shape)
print('X_test shape:', X_test.shape)


model = Sequential()
model.add(Embedding(max_features, 128, input_length=maxlen))

#model.add(SimpleRNN(128))
#model.add(GRU(128))
model.add(LSTM(128))

model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))


model.compile(loss='binary_crossentropy', optimizer='adam')
# Global params:
NB_WORDS = 20000
SKIP_TOP = 0
TEST_SPLIT = 0.2
INIT_SEED = 2017
GLOBAL_SEED = 2018
MAXLEN = 80
BATCH_SIZE = 128
TEST_BATCH_SIZE = 512


# In[2]:


(X_train, Y_train), (X_test, Y_test) = imdb.load_data(num_words=NB_WORDS)
print(len(X_train), 'train sequences')
print(len(X_test), 'test sequences')
print('Pad sequences (samples x time)')
X_train = sequence.pad_sequences(X_train, maxlen=MAXLEN)
X_test = sequence.pad_sequences(X_test, maxlen=MAXLEN)
print('x_train shape:', X_train.shape)
print('x_test shape:', X_test.shape)


# In[3]:


class Model(nn.Module):
    def __init__(self, nb_words, hidden_size=128, embedding_size=128, n_layers=1,
                 wdrop=0.25, odrop=0.25, edrop=0.1, idrop=0.25, variational=False,
Esempio n. 6
0
def main(unused_argv):

    # loading data
    (x_train, y_train), (x_test,
                         y_test) = imdb.load_data(maxlen=FLAGS.max_len,
                                                  num_words=FLAGS.num_words,
                                                  index_from=FLAGS.index_from)

    # loading map from word to index and index to word
    word_to_index, index_to_word = _load_map_dicts()

    # run config
    run_config = tf.estimator.RunConfig()

    # loading estimators
    if FLAGS.use_canned_estimator:
        xc = tf.contrib.layers.sparse_column_with_integerized_feature(
            'x', FLAGS.num_words)
        xc = tf.contrib.layers.embedding_column(xc, FLAGS.embed_dim)

        # creates estimator
        estimator = tf.contrib.learn.DynamicRnnEstimator(
            config=run_config,
            model_dir=FLAGS.model_dir,
            problem_type=constants.ProblemType.CLASSIFICATION,
            prediction_type=PredictionType.SINGLE_VALUE,
            sequence_feature_columns=[xc],
            context_feature_columns=None,
            num_units=FLAGS.num_rnn_units,
            cell_type=FLAGS.cell_type,
            optimizer=FLAGS.optimizer,
            learning_rate=FLAGS.learning_rate,
            num_classes=FLAGS.num_classes,
            dropout_keep_probabilities=FLAGS.dropout_keep_probabilities)

    else:
        model_fn = CustomRNNEstimator(rnn_cell_sizes=FLAGS.num_rnn_units,
                                      label_dimension=FLAGS.num_classes,
                                      num_words=FLAGS.num_words,
                                      dnn_layer_sizes=FLAGS.num_dnn_units,
                                      optimizer=FLAGS.optimizer,
                                      learning_rate=FLAGS.learning_rate,
                                      embed_dim=FLAGS.embed_dim)
        estimator = tf.estimator.Estimator(model_fn=model_fn,
                                           model_dir=FLAGS.model_dir,
                                           config=run_config)

    # getting test input_function
    test_input = get_input_fn(x_test,
                              y_test,
                              FLAGS.eval_batch_size,
                              epochs=1,
                              max_length=FLAGS.max_len,
                              shuffle=False)

    predictions = list(estimator.predict(input_fn=test_input))

    for i in range(5):
        print(_ids_to_sentence(x_test[i], index_to_word))
        print('Prediction:', predictions[i])
        print('Label:', y_test[i])