def test_modules(): module_registry.reset() class ModuleTypeA(ModuleBase): module_type = "Atype" @ModuleTypeA.register class AParent(ModuleTypeA): module_name = "AParent" config_spec = [ ConfigOption(key="key1", default_value="val1", description="test option") ] dependencies = [ Dependency(key="myfoo", module="Atype", name="AFoo", default_config_overrides={"changethis": 42}), Dependency(key="bar", module="Atype", name="ABar"), ] @ModuleTypeA.register class AFoo(ModuleTypeA): module_name = "AFoo" config_spec = [ ConfigOption(key="foo1", default_value="val1", description="test option"), ConfigOption(key="changethis", default_value=0, description="something to override"), ] dependencies = [ Dependency(key="myfoobar", module="Atype", name="AFooBar") ] @ModuleTypeA.register class ABar(ModuleTypeA): module_name = "ABar" config_spec = [ ConfigOption(key="bar1", default_value="val1", description="test option") ] @ModuleTypeA.register class AFooBar(ModuleTypeA): module_name = "AFooBar" config_spec = [ ConfigOption(key="foobar1", default_value="val1", description="test option") ] return ModuleTypeA, AParent
def test_types(): module_registry.reset() class ModuleFoo(ModuleBase): module_type = "Atype" module_name = "foo" config_spec = [ ConfigOption(key="str1", default_value="foo"), ConfigOption(key="str2", default_value=9, value_type=str), ConfigOption(key="int1", default_value=2), ConfigOption(key="int2", default_value="3", value_type=int), ConfigOption(key="float1", default_value=2.2), ConfigOption(key="float2", default_value="3.3", value_type=float), ConfigOption(key="bool1", default_value=False), ConfigOption(key="bool2", default_value="false", value_type=bool), ConfigOption(key="bool3", default_value="true", value_type=bool), ConfigOption(key="strlist1", default_value=3, value_type="strlist"), ConfigOption(key="strlist2", default_value=[4, 5], value_type="strlist"), ConfigOption(key="strlist3", default_value="4,5", value_type="strlist"), ConfigOption(key="intlist1", default_value=3, value_type="intlist"), ConfigOption(key="intlist2", default_value="3", value_type="intlist"), ConfigOption(key="intlist3", default_value=(4, 5), value_type="intlist"), ConfigOption(key="intlist4", default_value="4,5", value_type="intlist"), ConfigOption(key="floatlist1", default_value=3, value_type="floatlist"), ConfigOption(key="none-or-str", default_value=None), ] foo = ModuleFoo() assert type(foo.config["str1"]) == str assert type(foo.config["str2"]) == str assert type(foo.config["int1"]) == int assert type(foo.config["int2"]) == int assert type(foo.config["float1"]) == float assert type(foo.config["float2"]) == float assert type(foo.config["none-or-str"]) == type(None) assert foo.config["bool1"] is False assert foo.config["bool2"] is False assert foo.config["bool3"] is True assert foo.config["strlist1"] == ("3",) assert foo.config["strlist2"] == ("4", "5") assert foo.config["strlist3"] == ("4", "5") assert foo.config["intlist1"] == (3,) assert foo.config["intlist2"] == (3,) assert foo.config["intlist3"] == (4, 5) assert foo.config["intlist4"] == (4, 5) assert foo.config["floatlist1"] == (3.0,) foo = ModuleFoo({"none-or-str": "str"}) assert type(foo.config["none-or-str"]) == str assert foo.config["none-or-str"] == "str"
def rank_modules(): module_registry.reset() constants.reset() class Task(ModuleBase): module_type = "task" requires_random_seed = True @Task.register class ThreeRankTask(Task): """ A strange rank task that runs two searchers on benchmark #1 (via TwoRank) and the third searcher on benchmark #2 """ module_name = "threerank" dependencies = [ Dependency(key="tworank", module="task", name="tworank"), Dependency(key="rank3", module="task", name="rank"), ] @Task.register class TwoRankTask(Task): """ A rank tasks two runs two searchers on the same benchmark """ module_name = "tworank" dependencies = [ Dependency(key="benchmark", module="benchmark", name="rob04yang", provide_this=True, provide_children=["collection"]), Dependency(key="rank1a", module="task", name="rank"), Dependency(key="rank1b", module="task", name="rank"), ] @Task.register class RankTask(Task): module_name = "rank" dependencies = [ Dependency(key="benchmark", module="benchmark", name="rob04yang", provide_this=True, provide_children=["collection"]), Dependency(key="searcher", module="searcher", name="bm25"), ] @ModuleBase.register class BenchmarkRob04(ModuleBase): module_type = "benchmark" module_name = "rob04yang" dependencies = [ Dependency(key="collection", module="collection", name="robust04") ] @ModuleBase.register class BenchmarkTRECDL(ModuleBase): module_type = "benchmark" module_name = "trecdl" dependencies = [ Dependency(key="collection", module="collection", name="msmarco") ] @ModuleBase.register class SearcherBM25(ModuleBase): module_type = "searcher" module_name = "bm25" dependencies = [ Dependency(key="index", module="index", name="anserini") ] config_spec = [ ConfigOption(key="k1", default_value=1.0, description="k1 parameter") ] # Searchers are unlikely to actually need a seed, but we require it for testing requires_random_seed = True @ModuleBase.register class IndexAnserini(ModuleBase): module_type = "index" module_name = "anserini" dependencies = [ Dependency(key="collection", module="collection", name="robust04") ] config_spec = [ ConfigOption(key="stemmer", default_value="porter", description="stemming") ] @ModuleBase.register class CollectionRobust04(ModuleBase): module_type = "collection" module_name = "robust04" @ModuleBase.register class CollectionMSMARCO(ModuleBase): module_type = "collection" module_name = "msmarco" return [ThreeRankTask, TwoRankTask, RankTask]
def rank_modules(): module_registry.reset() constants.reset() class Task(ModuleBase): module_type = "task" requires_random_seed = True @Task.register class ThreeRankTask(Task): """ A strange rank task that runs two searchers on benchmark #1 (via TwoRank) and the third searcher on benchmark #2 """ module_name = "threerank" dependencies = [ Dependency(key="tworank", module="task", name="tworank"), Dependency(key="rank3", module="task", name="rank"), ] @Task.register class TwoRankTask(Task): """ A rank tasks two runs two searchers on the same benchmark """ module_name = "tworank" dependencies = [ Dependency(key="benchmark", module="benchmark", name="rob04yang", provide_this=True, provide_children=["collection"]), Dependency(key="rank1a", module="task", name="rank"), Dependency(key="rank1b", module="task", name="rank"), ] @Task.register class RankTask(Task): module_name = "rank" dependencies = [ Dependency(key="benchmark", module="benchmark", name="rob04yang", provide_this=True, provide_children=["collection"]), Dependency(key="searcher", module="searcher", name="bm25"), ] @Task.register class RerankTask(Task): module_name = "rerank" config_spec = [ ConfigOption("fold", "s1", "fold to run"), ConfigOption("optimize", "map", "metric to maximize on the dev set" ), # affects train() because we check to save weights ] dependencies = [ Dependency(key="benchmark", module="benchmark", name="rob04yang", provide_this=True, provide_children=["collection"]), Dependency(key="rank", module="task", name="rank"), Dependency(key="reranker", module="reranker", name="DRMM"), ] @ModuleBase.register class BenchmarkRob04(ModuleBase): module_type = "benchmark" module_name = "rob04yang" dependencies = [ Dependency(key="collection", module="collection", name="robust04") ] @ModuleBase.register class BenchmarkTRECDL(ModuleBase): module_type = "benchmark" module_name = "trecdl" dependencies = [ Dependency(key="collection", module="collection", name="msmarco") ] @ModuleBase.register class SearcherBM25(ModuleBase): module_type = "searcher" module_name = "bm25" dependencies = [ Dependency(key="index", module="index", name="anserini") ] config_spec = [ ConfigOption(key="k1", default_value=1.0, description="k1 parameter") ] # Searchers are unlikely to actually need a seed, but we require it for testing requires_random_seed = True @ModuleBase.register class IndexAnserini(ModuleBase): module_type = "index" module_name = "anserini" dependencies = [ Dependency(key="collection", module="collection", name="robust04") ] config_spec = [ ConfigOption(key="stemmer", default_value="porter", description="stemming") ] @ModuleBase.register class CollectionRobust04(ModuleBase): module_type = "collection" module_name = "robust04" @ModuleBase.register class CollectionMSMARCO(ModuleBase): module_type = "collection" module_name = "msmarco" @ModuleBase.register class ExtractorEmbedtext(ModuleBase): module_type = "extractor" module_name = "embedtext" dependencies = [ Dependency(key="index", module="index", name="anserini", default_config_overrides={"stemmer": "none"}), Dependency(key="tokenizer", module="tokenizer", name="anserini"), ] config_spec = [ ConfigOption("embeddings", "glove6b"), ConfigOption("zerounk", False), ConfigOption("calcidf", True), ConfigOption("maxqlen", 4), ConfigOption("maxdoclen", 800), ConfigOption("usecache", False), ] @ModuleBase.register class TokenizerAnserini(ModuleBase): module_type = "tokenizer" module_name = "anserini" config_spec = [ ConfigOption("keepstops", True, "keep stopwords if True"), ConfigOption("stemmer", "none", "stemmer: porter, krovetz, or none"), ] @ModuleBase.register class TrainerPytorch(ModuleBase): module_type = "trainer" module_name = "pytorch" config_spec = [ ConfigOption("batch", 32, "batch size"), ConfigOption("niters", 20), ConfigOption("itersize", 512), ConfigOption("gradacc", 1), ConfigOption("lr", 0.001), ConfigOption("softmaxloss", False), ConfigOption("fastforward", False), ConfigOption("validatefreq", 1), ConfigOption("boardname", "default"), ] config_keys_not_in_path = ["fastforward", "boardname"] @ModuleBase.register class RerankerDRMM(ModuleBase): module_type = "reranker" module_name = "DRMM" dependencies = [ Dependency(key="extractor", module="extractor", name="embedtext"), Dependency(key="trainer", module="trainer", name="pytorch"), ] config_spec = [ ConfigOption("nbins", 29, "number of bins in matching histogram"), ConfigOption("nodes", 5, "hidden layer dimension for matching network"), ConfigOption("histType", "LCH", "histogram type: CH, NH, LCH"), ConfigOption("gateType", "IDF", "term gate type: TV or IDF"), ] return [ThreeRankTask, TwoRankTask, RankTask, RerankTask]