예제 #1
0
def generate_stats(title_list,
                   show_features,
                   save,
                   outpath,
                   patterns=PATTERNS,
                   outext=OUTEXT):
    if show_features:
        show_feature_matchers()
    all_titles = load_title_list(title_list)
    n_titles = len(all_titles)
    for pattern_name in patterns:
        if save:
            matched_titles = []
        current_count = 0
        for title in tqdm(all_titles):
            if match_feature(pattern_name, title):
                current_count += 1
                if save:
                    matched_titles.append(title)
        print_stats(pattern_name, current_count, n_titles)
        if save:
            if not isdir(outpath):
                mkdir(outpath)
            outfilename = join(outpath, pattern_name + outext)
            with open(outfilename, 'w') as outfile:
                json.dump(matched_titles, outfile, indent=4)
예제 #2
0
def test_contain_colon(colon_or_minus_in, colon_or_minus_out):
    assert match_feature('colon_or_minus',
                         colon_or_minus_in) == colon_or_minus_out
예제 #3
0
def test_contain_the(contains_the_in, contains_the_out):
    assert match_feature('the_The_THE', contains_the_in) == contains_the_out
예제 #4
0
def test_contain_and(contains_and_in, contains_and_out):
    assert match_feature('and_And_AND', contains_and_in) == contains_and_out
예제 #5
0
def test_contain_versus_vs(contains_vs_in, contains_vs_out):
    assert match_feature('versus_vs', contains_vs_in) == contains_vs_out
예제 #6
0
def test_starts_with_number(starts_with_number_in, starts_with_number_out):
    assert match_feature('starts_with_number',
                         starts_with_number_in) == starts_with_number_out
예제 #7
0
def test_two_numbers(contains_two_numbers_in, contains_two_numbers_out):
    assert match_feature('contains_two_numbers',
                         contains_two_numbers_in) == contains_two_numbers_out
예제 #8
0
def test_contains_one_number(contains_one_number_in, contains_one_number_out):
    assert match_feature('contains_one_number',
                         contains_one_number_in) == contains_one_number_out
예제 #9
0
def test_contains_number_letter(number_letter_in, number_letter_out):
    assert match_feature('number_letter',
                         number_letter_in) == number_letter_out
예제 #10
0
def test_contain_a_year(contains_a_year_in, contains_a_year_out):
    assert match_feature('contains_a_year',
                         contains_a_year_in) == contains_a_year_out
예제 #11
0
def test_contain_famous_patron(famous_patron_in, famous_patron_out):
    assert match_feature('famous_patron',
                         famous_patron_in) == famous_patron_out
예제 #12
0
def test_contain_apostrophes(apostrophes_in, apostrophes_out):
    assert match_feature('apostrophes', apostrophes_in) == apostrophes_out
예제 #13
0
def test_editions(editions_in, editions_out):
    assert match_feature('editions', editions_in) == editions_out
예제 #14
0
def test_contain_remastered(HD_remastered_in, HD_remastered_out):
    assert match_feature('HD_remastered',
                         HD_remastered_in) == HD_remastered_out
예제 #15
0
def test_contain_directorscut(directors_cut_in, directors_cut_out):
    assert match_feature('directors_cut',
                         directors_cut_in) == directors_cut_out