padded_english_sentences, padded_norwegian_sentences) # Count vocabulary in English and Norwegian sentences for sentence in english_train_sentences: english_language.count_words(sentence) for sentence in norwegian_train_sentences: norwegian_language.count_words(sentence) # Choose top N vocabulary in both languages N = 10000 english_language.top_n_words(N) norwegian_language.top_n_words(N) # Create dictionary to map words to idx english_language.index_words() norwegian_language.index_words() # Convert sentences into indices english_train_sentences_ids = convert_sentences_index( english_language, english_train_sentences) norwegian_train_sentences_ids = convert_sentences_index( norwegian_language, norwegian_train_sentences) english_test_sentences_ids = convert_sentences_index( english_language, english_test_sentences) norwegian_test_sentences_ids = convert_sentences_index( norwegian_language, norwegian_test_sentences) print(english_train_sentences_ids.shape) print(norwegian_train_sentences_ids.shape)