def generate_stats(title_list, show_features, save, outpath, patterns=PATTERNS, outext=OUTEXT): if show_features: show_feature_matchers() all_titles = load_title_list(title_list) n_titles = len(all_titles) for pattern_name in patterns: if save: matched_titles = [] current_count = 0 for title in tqdm(all_titles): if match_feature(pattern_name, title): current_count += 1 if save: matched_titles.append(title) print_stats(pattern_name, current_count, n_titles) if save: if not isdir(outpath): mkdir(outpath) outfilename = join(outpath, pattern_name + outext) with open(outfilename, 'w') as outfile: json.dump(matched_titles, outfile, indent=4)
def test_contain_colon(colon_or_minus_in, colon_or_minus_out): assert match_feature('colon_or_minus', colon_or_minus_in) == colon_or_minus_out
def test_contain_the(contains_the_in, contains_the_out): assert match_feature('the_The_THE', contains_the_in) == contains_the_out
def test_contain_and(contains_and_in, contains_and_out): assert match_feature('and_And_AND', contains_and_in) == contains_and_out
def test_contain_versus_vs(contains_vs_in, contains_vs_out): assert match_feature('versus_vs', contains_vs_in) == contains_vs_out
def test_starts_with_number(starts_with_number_in, starts_with_number_out): assert match_feature('starts_with_number', starts_with_number_in) == starts_with_number_out
def test_two_numbers(contains_two_numbers_in, contains_two_numbers_out): assert match_feature('contains_two_numbers', contains_two_numbers_in) == contains_two_numbers_out
def test_contains_one_number(contains_one_number_in, contains_one_number_out): assert match_feature('contains_one_number', contains_one_number_in) == contains_one_number_out
def test_contains_number_letter(number_letter_in, number_letter_out): assert match_feature('number_letter', number_letter_in) == number_letter_out
def test_contain_a_year(contains_a_year_in, contains_a_year_out): assert match_feature('contains_a_year', contains_a_year_in) == contains_a_year_out
def test_contain_famous_patron(famous_patron_in, famous_patron_out): assert match_feature('famous_patron', famous_patron_in) == famous_patron_out
def test_contain_apostrophes(apostrophes_in, apostrophes_out): assert match_feature('apostrophes', apostrophes_in) == apostrophes_out
def test_editions(editions_in, editions_out): assert match_feature('editions', editions_in) == editions_out
def test_contain_remastered(HD_remastered_in, HD_remastered_out): assert match_feature('HD_remastered', HD_remastered_in) == HD_remastered_out
def test_contain_directorscut(directors_cut_in, directors_cut_out): assert match_feature('directors_cut', directors_cut_in) == directors_cut_out