def test_naive_model(model: MultinomialNB, vocabulary: CountVectorizer): # Get data, ignore train and test _, _, (x_test, y_test) = sentiment_140.load_data() starttime = time.time() x_test = vocabulary.transform(x_test) acc = model.score(x_test, y_test) endtime = time.time() return acc, endtime - starttime
def test_keras_model(model: Sequential, tokenizer, arguments): """Test model on sentiment140 dataset. Both accuracy and throughput.""" if arguments["maxlen"]: maxlen = arguments["maxlen"] else: maxlen = 0 # Get data, ignore train and test _, _, (x_test, y_test) = sentiment_140.load_data() starttime = time.time() x_test = tokenizer.texts_to_sequences(x_test) x_test = sequence.pad_sequences(x_test, maxlen=maxlen) score, acc = model.evaluate(x_test, y_test) endtime = time.time() return score, acc, endtime - starttime
# Convolution kernel_size = 5 filters = 64 pool_size = 4 # RNN rnn_output_size = 70 # Training batch_size = 512 epochs = 5 print('Loading data...') (x_train, y_train), (x_val, y_val), (x_test, y_test) = sentiment_140.load_data() print('Fitting tokenizer...') tokenizer = Tokenizer() tokenizer.fit_on_texts(np.concatenate((x_train, x_val, x_test))) print('Convert text to sequences') x_train = tokenizer.texts_to_sequences(x_train) x_val = tokenizer.texts_to_sequences(x_val) x_test = tokenizer.texts_to_sequences(x_test) print(len(x_train), 'train sequences') print(len(x_val), 'validation sequences') print(len(x_test), 'test sequences') print('Pad sequences (samples x time)')
# Convolution kernel_size = 5 filters = 96 pool_size = 4 # RNN rnn_output_size = 70 # Training batch_size = 512 epochs = 5 print('Loading data...') (x_train, y_train), (x_val, y_val), (x_test, y_test) = sentiment_140.load_data() print('Fitting tokenizer...') tokenizer = Tokenizer() tokenizer.fit_on_texts(np.concatenate((x_train, x_val, x_test))) print('Convert text to sequences') x_train = tokenizer.texts_to_sequences(x_train) x_val = tokenizer.texts_to_sequences(x_val) x_test = tokenizer.texts_to_sequences(x_test) print('Pad sequences (samples x time)') x_train = sequence.pad_sequences(x_train, maxlen=maxlen) x_val = sequence.pad_sequences(x_val, maxlen=maxlen) x_test = sequence.pad_sequences(x_test, maxlen=maxlen)