def test_preprocess_taxons():
    dbf = ProgramFilter(db)
    names = ["X/S/M.*", "O(?!/C).*", "Y/E", "non_matching_pattern"]
    taxons = dbf.preprocess_taxons(names)
    print(sorted(taxons))
    assert taxons == [
        "O",
        "O/J",
        "O/N",
        "O/N/P",
        "X/S/M",
        "X/S/M/L",
        "X/S/M/L/R",
        "X/S/M/L/R/D",
        "X/S/M/L/R/D/A",
        "X/S/M/L/V",
        "Y/E",
    ]
    names = [
        ("never mind", "O/J", "Y/E"),
        ("whatever", r"X/S/M/L/R/D.*", r"O/N.*"),
        ("who cares", r"X/S/M/L/R.*", r"O/N"),
    ]
    taxons = dbf.preprocess_taxons(names)
    print(sorted(taxons))
    assert taxons == [
        ("never mind", "O/J", "Y/E"),
        ("whatever", "X/S/M/L/R/D", "O/N"),
        ("whatever", "X/S/M/L/R/D", "O/N/P"),
        ("whatever", "X/S/M/L/R/D/A", "O/N"),
        ("whatever", "X/S/M/L/R/D/A", "O/N/P"),
        ("who cares", "X/S/M/L/R", "O/N"),
        ("who cares", "X/S/M/L/R/D", "O/N"),
        ("who cares", "X/S/M/L/R/D/A", "O/N"),
    ]
def test_include_taxons():

    # The taxon "variable/assignment/single" is featured by assignment.py and collatz_print.py. In
    # collatz_print.py, it appears after a taxon "io/standard/print". Consequently, it should be included
    # in the results, but not the programs which import it: fizzbuzz.py and is_even.py.
    dbf = ProgramFilter(db)
    dbf.include_taxons({("after", "variable/assignment/single", "io/standard/print")})
    print(set(dbf.selected_programs.keys()))
    assert set(dbf.selected_programs.keys()) == {"collatz_print.py"}

    # "operator/arithmetic/modulo" and "type/number/integer/literal" are both featured on
    # the same line in all programs except assignment.py
    dbf = ProgramFilter(db)
    dbf.include_taxons({("equals", "operator/arithmetic/modulo", "type/number/integer/literal")})
    print(set(dbf.selected_programs.keys()))
    assert set(dbf.selected_programs.keys()) == {"collatz_print.py", "is_even.py", "fizzbuzz.py"}

    # The same with "x == y" instead of "equals"
    dbf = ProgramFilter(db)
    dbf.include_taxons({("equals", "operator/arithmetic/modulo", "type/number/integer/literal")})
    print(set(dbf.selected_programs.keys()))
    assert set(dbf.selected_programs.keys()) == {"collatz_print.py", "is_even.py", "fizzbuzz.py"}

    # "test/equality" is inside "subroutine/function" in is_even.py, which is not imported anywhere.
    dbf = ProgramFilter(db)
    dbf.include_taxons({("inside", "test/equality", "subroutine/function")})
    print(set(dbf.selected_programs.keys()))
    assert set(dbf.selected_programs.keys()) == {"is_even.py"}

    # "test/equality" is inside "subroutine/function" in is_even.py and inside
    # "subroutine/procedure" in collatz_print.py. Both will be included.
    dbf = ProgramFilter(db)
    taxons = dbf.preprocess_taxons([("inside", "test/equality", "subroutine/.*")])
    print(taxons)
    assert taxons == [
        ("inside", "test/equality", "subroutine/argument/arg"),
        ("inside", "test/equality", "subroutine/function"),
        ("inside", "test/equality", "subroutine/predicate"),
        ("inside", "test/equality", "subroutine/procedure"),
    ]
    dbf.include_taxons(taxons)
    print(set(dbf.selected_programs.keys()))
    assert set(dbf.selected_programs.keys()) == {"collatz_print.py", "is_even.py"}

    # "call/function/builtin/range" is not inside "flow/conditional" anywhere.
    dbf = ProgramFilter(db)
    dbf.include_taxons([("inside", "call/function/builtin/range", "flow/conditional")])
    print(set(dbf.selected_programs.keys()))
    assert set(dbf.selected_programs.keys()) == set()