english_language.top_n_words(N) norwegian_language.top_n_words(N) # Create dictionary to map words to idx english_language.index_words() norwegian_language.index_words() # Convert sentences into indices english_train_sentences_ids = convert_sentences_index( english_language, english_train_sentences) norwegian_train_sentences_ids = convert_sentences_index( norwegian_language, norwegian_train_sentences) english_test_sentences_ids = convert_sentences_index( english_language, english_test_sentences) norwegian_test_sentences_ids = convert_sentences_index( norwegian_language, norwegian_test_sentences) print(english_train_sentences_ids.shape) print(norwegian_train_sentences_ids.shape) # Vocabulary size of English and Norwegian english_vocab_size = english_language.vocab_size() norwegian_vocab_size = norwegian_language.vocab_size() print("Number of Training Sentences: %d" % len(english_train_sentences)) print("English Vocab: %d" % english_vocab_size) print("Norwegian Vocab: %d" % norwegian_vocab_size) pkl.dump(english_language, open('./english_language', 'wb')) pkl.dump(norwegian_language, open('./norwegian_language', 'wb'))