import os wandb.init() config = wandb.config # set parameters: config.vocab_size = 1000 config.maxlen = 300 config.batch_size = 32 config.embedding_dims = 50 config.filters = 250 config.kernel_size = 3 config.hidden_dims = 100 config.epochs = 10 (X_train, y_train), (X_test, y_test) = imdb.load_imdb() if not os.path.exists("glove.6B.100d.txt"): print("Downloading glove embeddings...") subprocess.check_output( "curl -OL http://nlp.stanford.edu/data/glove.6B.zip && unzip glove.6B.zip", shell=True) print("Tokenizing input...") tokenizer = text.Tokenizer(num_words=config.vocab_size) tokenizer.fit_on_texts(X_train) X_train = tokenizer.texts_to_sequences(X_train) X_test = tokenizer.texts_to_sequences(X_test) X_train = sequence.pad_sequences(X_train, maxlen=config.maxlen) X_test = sequence.pad_sequences(X_test, maxlen=config.maxlen)
from keras.preprocessing import text wandb.init() config = wandb.config # set parameters: config.vocab_size = 1000 config.maxlen = 1000 config.batch_size = 32 config.embedding_dims = 50 config.filters = 250 config.kernel_size = 3 config.hidden_dims = 250 config.epochs = 10 (X_train, y_train), (X_test, y_test) = imdb.load_imdb() print("Review", X_train[0]) print("Label", y_train[0]) tokenizer = text.Tokenizer(num_words=config.vocab_size) tokenizer.fit_on_texts(X_train) X_train = tokenizer.texts_to_sequences(X_train) X_test = tokenizer.texts_to_sequences(X_test) X_train = sequence.pad_sequences(X_train, maxlen=config.maxlen) X_test = sequence.pad_sequences(X_test, maxlen=config.maxlen) print(X_train.shape) print("After pre-processing", X_train[0]) model = Sequential() model.add(Embedding(config.vocab_size,