def test_preprocess_taxons(): dbf = ProgramFilter(db) names = ["X/S/M.*", "O(?!/C).*", "Y/E", "non_matching_pattern"] taxons = dbf.preprocess_taxons(names) print(sorted(taxons)) assert taxons == [ "O", "O/J", "O/N", "O/N/P", "X/S/M", "X/S/M/L", "X/S/M/L/R", "X/S/M/L/R/D", "X/S/M/L/R/D/A", "X/S/M/L/V", "Y/E", ] names = [ ("never mind", "O/J", "Y/E"), ("whatever", r"X/S/M/L/R/D.*", r"O/N.*"), ("who cares", r"X/S/M/L/R.*", r"O/N"), ] taxons = dbf.preprocess_taxons(names) print(sorted(taxons)) assert taxons == [ ("never mind", "O/J", "Y/E"), ("whatever", "X/S/M/L/R/D", "O/N"), ("whatever", "X/S/M/L/R/D", "O/N/P"), ("whatever", "X/S/M/L/R/D/A", "O/N"), ("whatever", "X/S/M/L/R/D/A", "O/N/P"), ("who cares", "X/S/M/L/R", "O/N"), ("who cares", "X/S/M/L/R/D", "O/N"), ("who cares", "X/S/M/L/R/D/A", "O/N"), ]
def test_programs_of_pattern(): dbf = ProgramFilter(db) names = [r"prg[1-3]\.py", r"prg[7-9]\.py", "non_matching_pattern"] programs = set().union(*(dbf.programs_of_pattern(name) for name in names)) print(sorted(programs)) assert sorted(programs) == [ "prg1.py", "prg2.py", "prg3.py", "prg7.py", "prg8.py", "prg9.py" ]
def test_taxons_of_programs(): dbf = ProgramFilter(db) programs = {"prg8.py", "prg9.py", "non_existing_program"} taxons = dbf.taxons_of_programs(programs) prg8_taxons = set(dbf.db_programs["prg8.py"]["taxons"]) prg9_taxons = set(dbf.db_programs["prg9.py"]["taxons"]) print(sorted(taxons)) assert taxons == prg8_taxons | prg9_taxons
def test_exclude_taxons(): dbf = ProgramFilter(db) taxons = ["O/J", "X/S/M/L", "non_existing_taxon"] dbf.exclude_taxons(taxons) print(sorted(dbf.selected_programs)) assert dbf.selected_programs.keys() == {"prg2.py", "prg5.py"} for taxon in taxons: assert taxon not in db["programs"]["prg2.py"]["taxons"] assert taxon not in db["programs"]["prg5.py"]["taxons"]
def test_programs_of_taxons(): dbf = ProgramFilter(db) taxons = {"X/S/M/L/R/D/A", "X/S/M/L/R/D", "non_existing_taxon"} programs = dbf.programs_of_taxons(taxons, follow=False) print(sorted(programs)) for (db_program, db_program_data) in db["programs"].items(): if taxons.intersection(db_program_data["taxons"]): assert db_program in programs else: assert db_program not in programs
def test_impart_programs(): dbf = ProgramFilter(db) programs = {"prg1.py", "prg2.py", "non_existing_program"} dbf.impart_programs(programs) print(sorted(dbf.selected_programs)) assert dbf.selected_programs.keys() == set(db["programs"]) - set(programs) print(sorted(dbf.imparted_knowledge)) assert set(db["taxons"]) - dbf.imparted_knowledge == { "O/C/F", "O/C/F/U", "X/S/M/L/R/D/A", "Y/E", }
def test_impart_taxons(): # Imparting a knowledge decreases the learning cost of the corresponding taxons, but has no # effect whatsoever on the selected programs. dbf = ProgramFilter(db) dbf.impart_taxons({"flow/conditional"}) print(set(dbf.selected_programs.keys())) assert set(dbf.selected_programs.keys()) == { "assignment.py", "collatz_print.py", "fizzbuzz.py", "is_even.py", }
def test_include_taxons(): dbf = ProgramFilter(db) taxons = ["O/J", "X/S/M/L", "non_existing_taxon"] dbf.include_taxons(taxons) print(sorted(dbf.selected_programs)) assert dbf.selected_programs.keys() == { "prg1.py", "prg3.py", "prg4.py", "prg6.py", "prg7.py", "prg8.py", "prg9.py", }
def test_impart_taxons(): dbf = ProgramFilter(db) taxons = ["O/J", "X/S/M/L", "non/existing/taxon"] dbf.impart_taxons(taxons) print(sorted(dbf.imparted_knowledge)) assert dbf.imparted_knowledge == { "O", "O/J", "X", "X/S", "X/S/M", "X/S/M/L", "non", "non/existing", "non/existing/taxon", }
def test_init(): dbf = ProgramFilter(db) print(dbf.selected_programs) assert set(dbf.selected_programs.keys()) == { "assignment.py", "collatz_print.py", "fizzbuzz.py", "is_even.py", }
def test_taxa_of_pattern(): dbf = ProgramFilter(db) names = ["X/S/M", "O(?!/C)", "Y/E$", "non_matching_pattern"] taxa = set().union(*(dbf.taxa_of_pattern(name) for name in names)) print(sorted(taxa)) assert sorted(taxa) == [ "O", "O/J", "O/N", "O/N/P", "X/S/M", "X/S/M/L", "X/S/M/L/R", "X/S/M/L/R/D", "X/S/M/L/R/D/A", "X/S/M/L/V", "Y/E", ]
def test_taxa_of_programs(): dbf = ProgramFilter(db) # collatz is imported by fizzbuzz subset_taxa = dbf.taxa_of_programs({"collatz.py"}, follow=False) subset_taxa = set(filter(lambda x: not x.startswith("meta/"), subset_taxa)) print(sorted(subset_taxa)) print() # when following the importations... superset_taxa = dbf.taxa_of_programs({"fizzbuzz.py"}, follow=True) superset_taxa = set( filter(lambda x: not x.startswith("meta/"), superset_taxa)) print(sorted(superset_taxa)) # in addition to its own taxa, fizzbuzz features all those of collatz... assert superset_taxa.issuperset(subset_taxa) assert len(superset_taxa - subset_taxa) > 0 # when not following the importations... own_taxa = dbf.taxa_of_programs({"fizzbuzz.py"}, follow=False) own_taxa = set(filter(lambda x: not x.startswith("meta/"), own_taxa)) print(sorted(own_taxa)) assert superset_taxa.issuperset(own_taxa) assert len(superset_taxa - own_taxa) > 0
def test_programs_of_taxons(): dbf = ProgramFilter(db) taxons = {"variable/assignment/single"} # The taxon "variable/assignment/single" is featured by assignment.py and collatz_print.py. # This corresponds to follow=False. It is indirectly featured by fizzbuzz.py (which imports # collatz_print.py) and by is_even.py (which imports fizzbuzz.py). Their addition corresponds # to follow=True. programs = dbf.programs_of_taxons(taxons, follow=False) print(set(programs)) assert set(programs) == {"assignment.py", "collatz_print.py"} programs = dbf.programs_of_taxons(taxons, follow=True) print(set(programs)) assert set(programs) == { "assignment.py", "collatz_print.py", "fizzbuzz.py", "is_even.py", }
def test_include_programs(): # The program collatz.py is self-contained. Including it has no other effect on the # selection. dbf = ProgramFilter(db) dbf.include_programs({"collatz.py"}) print(dbf.selected_programs) assert dbf.selected_programs == {"collatz.py"} # Including a program does not include the programs it imports. dbf = ProgramFilter(db) dbf.include_programs({"fizzbuzz.py"}) print(dbf.selected_programs) assert dbf.selected_programs == {"fizzbuzz.py"}
def test_impart_taxons(): # Imparting a triple doesn't make much sense. Currently, it comes down to imparting the two # taxons, and ignoring the predicate. dbf = ProgramFilter(db) dbf.impart_taxons({("equals", "operator/arithmetic/modulo", "type/number/integer")}) print(set(dbf.selected_programs.keys())) assert set(dbf.selected_programs.keys()) == { "assignment.py", "collatz_print.py", "fizzbuzz.py", "is_even.py", } print(dbf.imparted_knowledge) assert dbf.imparted_knowledge == { "operator", "operator/arithmetic", "operator/arithmetic/modulo", "type", "type/number", "type/number/integer", }
def test_include_taxons(): # The taxon "variable/assignment/single" is directly featured by assignment.py and # collatz_print.py, but only indirectly by the other programs, which therefore cannot be # included in the result. Note that this behavior contrasts with that of exclude_taxons. dbf = ProgramFilter(db) dbf.include_taxons({"variable/assignment/single"}) print(set(dbf.selected_programs.keys())) assert set( dbf.selected_programs.keys()) == {"assignment.py", "collatz_print.py"} # "operator/arithmetic/addition" is directly featured by collatz_print.py only. Therefore, # including this taxon keeps only collatz_print.py. dbf = ProgramFilter(db) dbf.include_taxons({"operator/arithmetic/addition"}) print(set(dbf.selected_programs.keys())) assert set(dbf.selected_programs.keys()) == {"collatz_print.py"} # "flow/conditional" is featured by collatz_print.py, fizzbuzz.py, and indirectly by # is_even.py. Therefore, including this taxon keeps only the former two. dbf = ProgramFilter(db) dbf.include_taxons({"flow/conditional"}) print(set(dbf.selected_programs.keys())) assert set( dbf.selected_programs.keys()) == {"collatz_print.py", "fizzbuzz.py"} # "type/sequence/string/literal" is directly featured by fizzbuzz.py only. Therefore, including # this taxon keeps only fizzbuzz.py dbf = ProgramFilter(db) dbf.include_taxons({"type/sequence/string/literal"}) print(set(dbf.selected_programs.keys())) assert set(dbf.selected_programs.keys()) == {"fizzbuzz.py"}
import json from pathlib import Path import pytest import context from paroxython.filter_programs import ProgramFilter from paroxython.compare_spans import compare_spans db = json.loads(Path("examples/mini/programs_db.json").read_text()) dbf = ProgramFilter(db) triple_data = [ ( ( "var/assignment/explicit/single", # featured by assignment.py and collatz.py compare_spans["after"], # but after "call/function/builtin/print", # this taxon in collatz.py only. ), {"collatz.py"}, ), ( ( "operator/arithmetic/addition", # this taxon is featured by collatz.py only compare_spans["equals"], # and on the same line "operator/arithmetic/multiplication", # as that taxon ), {"collatz.py"}, ), ( (
def test_include_taxons(): # The taxon "variable/assignment/single" is featured by assignment.py and collatz_print.py. In # collatz_print.py, it appears after a taxon "io/standard/print". Consequently, it should be included # in the results, but not the programs which import it: fizzbuzz.py and is_even.py. dbf = ProgramFilter(db) dbf.include_taxons({("after", "variable/assignment/single", "io/standard/print")}) print(set(dbf.selected_programs.keys())) assert set(dbf.selected_programs.keys()) == {"collatz_print.py"} # "operator/arithmetic/modulo" and "type/number/integer/literal" are both featured on # the same line in all programs except assignment.py dbf = ProgramFilter(db) dbf.include_taxons({("equals", "operator/arithmetic/modulo", "type/number/integer/literal")}) print(set(dbf.selected_programs.keys())) assert set(dbf.selected_programs.keys()) == {"collatz_print.py", "is_even.py", "fizzbuzz.py"} # The same with "x == y" instead of "equals" dbf = ProgramFilter(db) dbf.include_taxons({("equals", "operator/arithmetic/modulo", "type/number/integer/literal")}) print(set(dbf.selected_programs.keys())) assert set(dbf.selected_programs.keys()) == {"collatz_print.py", "is_even.py", "fizzbuzz.py"} # "test/equality" is inside "subroutine/function" in is_even.py, which is not imported anywhere. dbf = ProgramFilter(db) dbf.include_taxons({("inside", "test/equality", "subroutine/function")}) print(set(dbf.selected_programs.keys())) assert set(dbf.selected_programs.keys()) == {"is_even.py"} # "test/equality" is inside "subroutine/function" in is_even.py and inside # "subroutine/procedure" in collatz_print.py. Both will be included. dbf = ProgramFilter(db) taxons = dbf.preprocess_taxons([("inside", "test/equality", "subroutine/.*")]) print(taxons) assert taxons == [ ("inside", "test/equality", "subroutine/argument/arg"), ("inside", "test/equality", "subroutine/function"), ("inside", "test/equality", "subroutine/predicate"), ("inside", "test/equality", "subroutine/procedure"), ] dbf.include_taxons(taxons) print(set(dbf.selected_programs.keys())) assert set(dbf.selected_programs.keys()) == {"collatz_print.py", "is_even.py"} # "call/function/builtin/range" is not inside "flow/conditional" anywhere. dbf = ProgramFilter(db) dbf.include_taxons([("inside", "call/function/builtin/range", "flow/conditional")]) print(set(dbf.selected_programs.keys())) assert set(dbf.selected_programs.keys()) == set()
def test_exclude_taxons(): # The taxon "variable/assignment/single" is featured by assignment.py and collatz_print.py. It # is indirectly featured by fizzbuzz.py (which imports collatz_print.py) and by is_even.py # (which imports fizzbuzz.py). Therefore, excluding this taxon excludes all four programs. dbf = ProgramFilter(db) dbf.exclude_taxons({"variable/assignment/single"}) print(set(dbf.selected_programs.keys())) assert set(dbf.selected_programs.keys()) == set() # "operator/arithmetic/addition" is featured by collatz_print.py, and indirectly by fizzbuzz.py # and is_even.py. Therefore, excluding this taxon keeps only assignment.py. dbf = ProgramFilter(db) dbf.exclude_taxons({"io/standard/print"}) print(set(dbf.selected_programs.keys())) assert set(dbf.selected_programs.keys()) == {"assignment.py"} # "flow/conditional" is featured by collatz_print.py, fizzbuzz.py, and indirectly by # is_even.py. Therefore, excluding this taxon keeps only assignment.py. dbf = ProgramFilter(db) dbf.exclude_taxons({"flow/conditional"}) print(set(dbf.selected_programs.keys())) assert set(dbf.selected_programs.keys()) == {"assignment.py"} # "type/sequence/string/literal" is featured by fizzbuzz.py, and indirectly by is_even.py. # Therefore, excluding this taxon keeps only assignment.py and collatz_print.py dbf = ProgramFilter(db) dbf.exclude_taxons({"type/sequence/string/literal"}) print(set(dbf.selected_programs.keys())) assert set( dbf.selected_programs.keys()) == {"assignment.py", "collatz_print.py"}
def test_include_programs(): # The program collatz_print.py is self-contained. Including it has no other effect on the # selection. dbf = ProgramFilter(db) dbf.include_programs({"collatz_print.py"}) print(set(dbf.selected_programs.keys())) assert set(dbf.selected_programs.keys()) == {"collatz_print.py"} # The program fizzbuzz.py imports collatz_print.py. Including the former includes both. dbf = ProgramFilter(db) dbf.include_programs({"fizzbuzz.py"}) print(set(dbf.selected_programs.keys())) assert set( dbf.selected_programs.keys()) == {"fizzbuzz.py", "collatz_print.py"} # The program is_even.py import collatz_print.py, which imports collatz_print.py. Including the # former includes the three of them. dbf = ProgramFilter(db) dbf.include_programs({"is_even.py"}) print(set(dbf.selected_programs.keys())) assert set(dbf.selected_programs.keys()) == { "is_even.py", "collatz_print.py", "fizzbuzz.py", }
def test_impart_programs(): # The program collatz_print.py is self-contained. Imparting it excludes it from the selection, # but has no other effect. dbf = ProgramFilter(db) dbf.impart_programs({"collatz_print.py"}) print(set(dbf.selected_programs.keys())) assert set(dbf.selected_programs.keys()) == { "assignment.py", "fizzbuzz.py", "is_even.py", } # The program fizzbuzz.py imports collatz_print.py. Imparting the former implies that the # latter has already been studied, and must be excluded from the selection. dbf = ProgramFilter(db) dbf.impart_programs({"fizzbuzz.py"}) print(set(dbf.selected_programs.keys())) assert set(dbf.selected_programs.keys()) == {"assignment.py", "is_even.py"} # The program is_even.py imports collatz_print.py, which imports collatz_print.py. Imparting # the former excludes the three of them from the selection. dbf = ProgramFilter(db) dbf.impart_programs({"is_even.py"}) print(set(dbf.selected_programs.keys())) assert set(dbf.selected_programs.keys()) == {"assignment.py"}
def test_exclude_programs(): dbf = ProgramFilter(db) programs = {"prg1.py", "prg2.py", "non_existing_program"} dbf.exclude_programs(programs, follow=True) print(sorted(dbf.selected_programs)) assert dbf.selected_programs == set(db["programs"]) - set(programs)
def test_exclude_programs(): # The program collatz_print.py is imported by fizzbuzz.py, and indirectly by is_even.py. # Excluding the former excludes the two latter too. dbf = ProgramFilter(db) dbf.exclude_programs({"collatz_print.py"}) print(set(dbf.selected_programs.keys())) assert set(dbf.selected_programs.keys()) == {"assignment.py"} # The program fizzbuzz.py is imported by is_even.py. Excluding the former excludes both. dbf = ProgramFilter(db) dbf.exclude_programs({"fizzbuzz.py"}) print(set(dbf.selected_programs.keys())) assert set( dbf.selected_programs.keys()) == {"assignment.py", "collatz_print.py"} # The program is_even.py is not imported. Excluding it has no other effect on the selection. dbf = ProgramFilter(db) dbf.exclude_programs({"is_even.py"}) print(set(dbf.selected_programs.keys())) assert set(dbf.selected_programs.keys()) == { "assignment.py", "collatz_print.py", "fizzbuzz.py", }
def test_init(): dbf = ProgramFilter(db) print(dbf.selected_programs) assert dbf.selected_programs.keys() == db["programs"].keys() assert not dbf.imparted_knowledge
def test_exclude_taxa(): # The taxon "var/assignment/explicit/single" is featured by assignment.py and collatz.py. It # is indirectly featured by fizzbuzz.py (which imports collatz.py) and by is_even.py # (which imports fizzbuzz.py). Therefore, excluding this taxon excludes all four programs. dbf = ProgramFilter(db) dbf.exclude_programs(dbf.programs_of_taxa( {"var/assignment/explicit/single"}), follow=True) print(dbf.selected_programs) assert dbf.selected_programs == set() # "operator/arithmetic/addition" is featured by collatz.py, and indirectly by fizzbuzz.py # and is_even.py. Therefore, excluding this taxon keeps only assignment.py. dbf = ProgramFilter(db) dbf.exclude_programs(dbf.programs_of_taxa({"call/function/builtin/print"}), follow=True) print(dbf.selected_programs) assert dbf.selected_programs == {"assignment.py"} # "flow/conditional" is featured by collatz.py, fizzbuzz.py, and indirectly by # is_even.py. Therefore, excluding this taxon keeps only assignment.py. dbf = ProgramFilter(db) dbf.exclude_programs(dbf.programs_of_taxa({"flow/conditional"}), follow=True) print(dbf.selected_programs) assert dbf.selected_programs == {"assignment.py"} # "type/sequence/string/literal" is featured by fizzbuzz.py, and indirectly by is_even.py. # Therefore, excluding this taxon keeps only assignment.py and collatz.py dbf = ProgramFilter(db) dbf.exclude_programs(dbf.programs_of_taxa({"type/sequence/string/literal" }), follow=True) print(dbf.selected_programs) assert dbf.selected_programs == {"assignment.py", "collatz.py"}
def test_preprocess_programs(): dbf = ProgramFilter(db) programs = dbf.preprocess_programs([r"prg[1-3]\.py", r"prg[7-9]\.py", "non_matching_pattern"]) print(sorted(programs)) assert programs == ["prg1.py", "prg2.py", "prg3.py", "prg7.py", "prg8.py", "prg9.py"]
def test_include_programs(): dbf = ProgramFilter(db) programs = {"prg1.py", "prg2.py", "non_existing_program"} dbf.include_programs(programs) print(sorted(dbf.selected_programs)) assert dbf.selected_programs.keys() == {"prg1.py", "prg2.py"}
def test_exclude_taxons(): # The taxon "variable/assignment/single" is featured by assignment.py and collatz_print.py. In # collatz_print.py, it appears after a taxon "io/standard/print". Consequently, it should be excluded # from the results, along with the programs which import it: fizzbuzz.py and is_even.py. dbf = ProgramFilter(db) dbf.exclude_taxons({("after", "variable/assignment/single", "io/standard/print")}) print(set(dbf.selected_programs.keys())) assert set(dbf.selected_programs.keys()) == {"assignment.py"} # "operator/arithmetic/addition" and "operator/arithmetic/multiplication" are both featured on # the same line of collatz_print.py, and indirectly by fizzbuzz.py and is_even.py. Therefore, # excluding this taxon keeps only assignment.py. dbf = ProgramFilter(db) dbf.exclude_taxons( {("equals", "operator/arithmetic/addition", "operator/arithmetic/multiplication")} ) print(set(dbf.selected_programs.keys())) assert set(dbf.selected_programs.keys()) == {"assignment.py"} # "test/equality" is inside "subroutine/function" in is_even.py, which is not imported anywhere. dbf = ProgramFilter(db) dbf.exclude_taxons({("inside", "test/equality", "subroutine/function")}) print(set(dbf.selected_programs.keys())) assert set(dbf.selected_programs.keys()) == {"collatz_print.py", "assignment.py", "fizzbuzz.py"} # "call/function/builtin/range" is not inside "flow/conditional" anywhere. dbf = ProgramFilter(db) dbf.exclude_taxons([("inside", "call/function/builtin/range", "flow/conditional")]) print(set(dbf.selected_programs.keys())) assert set(dbf.selected_programs.keys()) == { "is_even.py", "fizzbuzz.py", "assignment.py", "collatz_print.py", }