fixed_queries_to_words = pd.Series(fixed_queries).replace( '[' + punctuation + ']', '', regex=True).str.split() fixed_words = flatten_list(fixed_queries_to_words) original_queries_to_words = pd.Series(original_queries).replace( '[' + punctuation + ']', '', regex=True).str.split() original_words = flatten_list(original_queries_to_words) error_model = ErrorModel() for original, fixed in zip(original_queries_to_words, fixed_queries_to_words): number_of_words = min(len(original), len(fixed)) for i in range(number_of_words): error_model.update_statistics(original[i], fixed[i]) error_model.calculate_weights() language_model = LanguageModel() for fixed in fixed_queries_to_words: for word in fixed: language_model.update_statistics(word) language_model.calculate_weights() error_model.store_json('error.json') language_model.store_json('language.json') # In[ ]: