def build_first_name_data(): print('\n***** Build first names.') first_name_data = read_csv(FIRST_NAMES_PATHS['csv'], FIRST_NAMES_PATHS['columns']) max_length_first_name = get_longest_item(first_name_data) dump_data_to_pickle(FIRST_NAMES_PATHS['pkl'], first_name_data, max_length_first_name)
def build_word_data(): print('***** Build words.\n') words = [] for word in wn.words(): words.append(word) max_length_word = get_longest_item(words) dump_data_to_pickle(WORDS_PATHS['pkl'], words, max_length_word)
def build_word_counts(): """Build word counts.""" print('***** Build word counts.\n') words = load_pickle(WORDS_PATHS['pkl']) counts_word = get_counts(words) counts_word_normalized = normalize_counts(counts_word) dump_data_to_pickle(WORDS_PATHS['pkl_cnts'], counts_word) dump_data_to_pickle(WORDS_PATHS['pkl_cnts_norm'], counts_word_normalized)
def build_surname_counts(): """Build surname counts.""" print('***** Build surname counts.') surnames = load_pickle(SURNAMES_PATHS['pkl']) counts_surname = get_counts(surnames) counts_surname_normalized = normalize_counts(counts_surname) dump_data_to_pickle(SURNAMES_PATHS['pkl_cnts'], counts_surname) dump_data_to_pickle(SURNAMES_PATHS['pkl_cnts_norm'], counts_surname_normalized)
def build_first_name_counts(): """Build first name counts.""" print('\n***** Build first name counts.') first_names = load_pickle(FIRST_NAMES_PATHS['pkl']) counts_first = get_counts(first_names) counts_first_normalized = normalize_counts(counts_first) dump_data_to_pickle(FIRST_NAMES_PATHS['pkl_cnts'], counts_first) dump_data_to_pickle(FIRST_NAMES_PATHS['pkl_cnts_norm'], counts_first_normalized)
def build_surname_data(): print('***** Build surnames.') surname_data = read_csv(SURNAMES_PATHS['csv'], SURNAMES_PATHS['columns']) max_length_surname = get_longest_item(surname_data) dump_data_to_pickle(SURNAMES_PATHS['pkl'], surname_data, max_length_surname)