def test_apply_for_each_key(): input_dict = {'key1': 1, 'key2': 2} def func(x): return x * 2 expected = {'key1': 2, 'key2': 4} res = utils.apply_for_each_key(input_dict, func) assert res == expected
def lemmatize_all(self, data): get_lems = lambda examples: [self.lemmatize(ex) for ex in examples] return utils.apply_for_each_key(data, get_lems)
def remove_stop_words_for_all(self, data): remove_sw = lambda examples: [ self.remove_stop_words(ex) for ex in examples ] return utils.apply_for_each_key(data, remove_sw)
def tokenize_all_examples(self, data): tokenize = lambda data_list: [ self.tokenize_one_example(d) for d in data_list ] return utils.apply_for_each_key(data, tokenize)
def to_lower_case_all(self, data): to_lower_case = lambda examples: [ self.to_lower_case_one(ex) for ex in examples ] return utils.apply_for_each_key(data, to_lower_case)
def process_merged_sentences_for_all(self, data) -> t.Dict[str, np.array]: get_matrix = lambda examples: np.array( [self.process_words_merged_matrix(ex) for ex in examples]) return utils.apply_for_each_key(data, get_matrix)
def save_merged_matrix(self, matrix): as_list = lambda np_array: np_array.tolist() serializable_matrix = utils.apply_for_each_key(matrix, as_list) dump_merged_word_matrix(serializable_matrix)