예제 #1
0
def load_data(num_words=10000):
    '''Loads the reuters dataset and returns the vectorized training and test data
    '''
    (train_data, train_labels), (test_data, test_labels) =\
            reuters.load_data(num_words=num_words)
    x_train = vectorize_sequences(train_data)
    x_test = vectorize_sequences(test_data)
    return (x_train, train_labels), (x_test, test_labels)
예제 #2
0
    def __init__(self):
        max_words = 1000

        # The data, split between train and test sets:
        (x_train, y_train), (x_test,
                             y_test) = reuters.load_data(num_words=max_words,
                                                         test_split=0.2)
        num_classes = np.max(y_train) + 1
        self.tokenizer = Tokenizer(num_words=max_words)
        x_train = self.tokenizer.sequences_to_matrix(x_train, mode='binary')
        x_test = self.tokenizer.sequences_to_matrix(x_test, mode='binary')
        y_train = to_categorical(y_train, num_classes)
        y_test = to_categorical(y_test, num_classes)

        super().__init__(x_train, x_test, y_train, y_test, (max_words, ),
                         num_classes, 'reuters')
예제 #3
0
#加载数据集
from tensorflow.python.keras.datasets import reuters
data_path = 'D:\\data\\reuters.npz'
(train_data, train_labels), (test_data,
                             test_labels) = reuters.load_data(path=data_path,
                                                              num_words=10000)
print(train_data.shape)
#将索引解码为新闻文本
word_index = reuters.get_word_index()
reverse_word_index = dict([(value, key)
                           for (key, value) in word_index.items()])
decode_newswise = ' '.join(
    [reverse_word_index.get(i - 3, '?') for i in train_data[0]])
# print(decode_newswise)

#编码数据
import numpy as ny


def vectorize_sequences(sequences, dimension=10000):
    results = ny.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.
    return results


#将训练数据向量化
x_train = vectorize_sequences(train_data)
#将测试数据向量化
x_test = vectorize_sequences(test_data)
예제 #4
0
#加载数据集
from tensorflow.python.keras.datasets import reuters
data_path = 'D:\\data\\reuters.npz'
(train_data,train_labels),(test_data,test_labels) = reuters.load_data(path=data_path,num_words=10000)
print(train_data.shape)
#将索引解码为新闻文本
word_index = reuters.get_word_index()
reverse_word_index = dict([(value,key)for (key,value) in word_index.items()])
decode_newswise = ' '.join([reverse_word_index.get(i-3,'?')for i in train_data[0]])
# print(decode_newswise)

#编码数据
import  numpy as ny
def vectorize_sequences(sequences ,dimension = 10000):
    results = ny.zeros((len(sequences),dimension))
    for i,sequence in enumerate(sequences):
        results[i,sequence] = 1.
    return results

#将训练数据向量化
x_train = vectorize_sequences(train_data)
#将测试数据向量化
x_test = vectorize_sequences(test_data)

#将标签数据向量化
def to_one_hot(labels,dimension = 46):
    results = ny.zeros((len(labels),dimension))
    for i ,label in enumerate(labels):
        results[i,label] = 1.
    return  results
예제 #5
0
from tensorflow.python.keras.datasets import reuters

(train_data, train_labels), (test_data,
                             test_labels) = reuters.load_data(num_words=10000)
# Textausgabe im TensorBoard
#

import tensorflow as tf
from tensorflow import keras
from tensorflow.python.keras.datasets import cifar100
from tensorflow.python.keras.datasets import reuters

# Laden des Reuters-Datensatz
INDEX_FROM = 3
START_CHAR = 1
(x_train, y_train), (x_test, y_test) = reuters.load_data(path="reuters.npz",
                                                         num_words=None,
                                                         skip_top=0,
                                                         maxlen=None,
                                                         test_split=0.2,
                                                         seed=113,
                                                         start_char=START_CHAR,
                                                         oov_char=2,
                                                         index_from=INDEX_FROM)

# Mapping Funktion von id auf Wort
word_index = reuters.get_word_index(path="reuters_word_index.json")
word_index = {k: (v + INDEX_FROM) for k, v in word_index.items()}
word_index["<PAD>"] = 0
word_index["<START>"] = START_CHAR  # 1
word_index["<UNK>"] = 2
id_to_word = {value: key for key, value in word_index.items()}


# Funktion, die uns die Reuters Nachricht als String zurück gibt