import numpy as np import random from rnn import RNN from data import train_data, test_data # Создание словаря vocab = list(set([w for text in train_data.keys() for w in text.split(' ')])) vocab_size = len(vocab) print('Уникальных слов в тренеровочных данных: {}'.format(vocab_size)) # Присвоение каждому слову из словаря индекс word_to_idx = {w: i for i, w in enumerate(vocab)} idx_to_word = {i: w for i, w in enumerate(vocab)} def create_inputs(text): inputs = [] for w in text.split(' '): v = np.zeros((vocab_size, 1)) v[word_to_idx[w]] = 1 inputs.append(v) return inputs def softmax(xs): return np.exp(xs) / sum(np.exp(xs)) # Инициализация RNN rnn = RNN(vocab_size, 2)
import numpy as np import random from rnn import RNN from data import train_data, test_data import random #construct of vocabulary of words that exist in our data: vocab = list( set([word for phrase in train_data.keys() for word in phrase.split(" ")])) vocab_size = len(vocab) """ assign integer index to represent each word in vocab need to represent each word with an index bc RNNs can't understand words we have to give them numbers """ word_to_idx = {w: i for i, w in enumerate(vocab)} idx_to_word = {i: w for i, w in enumerate(vocab)} """ xi input to RNN is a vector. can use one-hot encoding we have 18 unique words in the vocabulary, so each xi will be 18 dimensional one-hot vector create_inputs returns array of one-hot vectors that represent the words in the input text string """ def create_inputs(text): res = [] for word in text.split(' '): vec = np.zeros((vocab_size, 1)) #start off as array of zeros vec[word_to_idx[word]] = 1 res.append(vec)