def make_train_set(): """ make train set from filtered data with specified categories level and corpus size limits """ data = read_as_json(FILTERED_DATA_PATH) corpora = get_n_level_categories_corpora(data, level=2) corpora = filter_corpora_by_each_class_size(corpora, lower_limit=200, upper_limit=1000) save_json_as_csv(corpora, CATEGORIES_CORPORA_PATH)
def print_categories(): """ print all items categories in filtered dataset """ data = read_as_json(FILTERED_DATA_PATH) categories = get_all_level_categories(data) categories = sorted(categories) for category in categories: print category