def load_data(num_words=10000): '''Loads the reuters dataset and returns the vectorized training and test data ''' (train_data, train_labels), (test_data, test_labels) =\ reuters.load_data(num_words=num_words) x_train = vectorize_sequences(train_data) x_test = vectorize_sequences(test_data) return (x_train, train_labels), (x_test, test_labels)
def __init__(self): max_words = 1000 # The data, split between train and test sets: (x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words, test_split=0.2) num_classes = np.max(y_train) + 1 self.tokenizer = Tokenizer(num_words=max_words) x_train = self.tokenizer.sequences_to_matrix(x_train, mode='binary') x_test = self.tokenizer.sequences_to_matrix(x_test, mode='binary') y_train = to_categorical(y_train, num_classes) y_test = to_categorical(y_test, num_classes) super().__init__(x_train, x_test, y_train, y_test, (max_words, ), num_classes, 'reuters')
#加载数据集 from tensorflow.python.keras.datasets import reuters data_path = 'D:\\data\\reuters.npz' (train_data, train_labels), (test_data, test_labels) = reuters.load_data(path=data_path, num_words=10000) print(train_data.shape) #将索引解码为新闻文本 word_index = reuters.get_word_index() reverse_word_index = dict([(value, key) for (key, value) in word_index.items()]) decode_newswise = ' '.join( [reverse_word_index.get(i - 3, '?') for i in train_data[0]]) # print(decode_newswise) #编码数据 import numpy as ny def vectorize_sequences(sequences, dimension=10000): results = ny.zeros((len(sequences), dimension)) for i, sequence in enumerate(sequences): results[i, sequence] = 1. return results #将训练数据向量化 x_train = vectorize_sequences(train_data) #将测试数据向量化 x_test = vectorize_sequences(test_data)
#加载数据集 from tensorflow.python.keras.datasets import reuters data_path = 'D:\\data\\reuters.npz' (train_data,train_labels),(test_data,test_labels) = reuters.load_data(path=data_path,num_words=10000) print(train_data.shape) #将索引解码为新闻文本 word_index = reuters.get_word_index() reverse_word_index = dict([(value,key)for (key,value) in word_index.items()]) decode_newswise = ' '.join([reverse_word_index.get(i-3,'?')for i in train_data[0]]) # print(decode_newswise) #编码数据 import numpy as ny def vectorize_sequences(sequences ,dimension = 10000): results = ny.zeros((len(sequences),dimension)) for i,sequence in enumerate(sequences): results[i,sequence] = 1. return results #将训练数据向量化 x_train = vectorize_sequences(train_data) #将测试数据向量化 x_test = vectorize_sequences(test_data) #将标签数据向量化 def to_one_hot(labels,dimension = 46): results = ny.zeros((len(labels),dimension)) for i ,label in enumerate(labels): results[i,label] = 1. return results
from tensorflow.python.keras.datasets import reuters (train_data, train_labels), (test_data, test_labels) = reuters.load_data(num_words=10000)
# Textausgabe im TensorBoard # import tensorflow as tf from tensorflow import keras from tensorflow.python.keras.datasets import cifar100 from tensorflow.python.keras.datasets import reuters # Laden des Reuters-Datensatz INDEX_FROM = 3 START_CHAR = 1 (x_train, y_train), (x_test, y_test) = reuters.load_data(path="reuters.npz", num_words=None, skip_top=0, maxlen=None, test_split=0.2, seed=113, start_char=START_CHAR, oov_char=2, index_from=INDEX_FROM) # Mapping Funktion von id auf Wort word_index = reuters.get_word_index(path="reuters_word_index.json") word_index = {k: (v + INDEX_FROM) for k, v in word_index.items()} word_index["<PAD>"] = 0 word_index["<START>"] = START_CHAR # 1 word_index["<UNK>"] = 2 id_to_word = {value: key for key, value in word_index.items()} # Funktion, die uns die Reuters Nachricht als String zurück gibt