Ejemplo n.º 1
0
def test_modules():
    module_registry.reset()

    class ModuleTypeA(ModuleBase):
        module_type = "Atype"

    @ModuleTypeA.register
    class AParent(ModuleTypeA):
        module_name = "AParent"
        config_spec = [
            ConfigOption(key="key1",
                         default_value="val1",
                         description="test option")
        ]
        dependencies = [
            Dependency(key="myfoo",
                       module="Atype",
                       name="AFoo",
                       default_config_overrides={"changethis": 42}),
            Dependency(key="bar", module="Atype", name="ABar"),
        ]

    @ModuleTypeA.register
    class AFoo(ModuleTypeA):
        module_name = "AFoo"
        config_spec = [
            ConfigOption(key="foo1",
                         default_value="val1",
                         description="test option"),
            ConfigOption(key="changethis",
                         default_value=0,
                         description="something to override"),
        ]
        dependencies = [
            Dependency(key="myfoobar", module="Atype", name="AFooBar")
        ]

    @ModuleTypeA.register
    class ABar(ModuleTypeA):
        module_name = "ABar"
        config_spec = [
            ConfigOption(key="bar1",
                         default_value="val1",
                         description="test option")
        ]

    @ModuleTypeA.register
    class AFooBar(ModuleTypeA):
        module_name = "AFooBar"
        config_spec = [
            ConfigOption(key="foobar1",
                         default_value="val1",
                         description="test option")
        ]

    return ModuleTypeA, AParent
Ejemplo n.º 2
0
def test_types():
    module_registry.reset()

    class ModuleFoo(ModuleBase):
        module_type = "Atype"
        module_name = "foo"
        config_spec = [
            ConfigOption(key="str1", default_value="foo"),
            ConfigOption(key="str2", default_value=9, value_type=str),
            ConfigOption(key="int1", default_value=2),
            ConfigOption(key="int2", default_value="3", value_type=int),
            ConfigOption(key="float1", default_value=2.2),
            ConfigOption(key="float2", default_value="3.3", value_type=float),
            ConfigOption(key="bool1", default_value=False),
            ConfigOption(key="bool2", default_value="false", value_type=bool),
            ConfigOption(key="bool3", default_value="true", value_type=bool),
            ConfigOption(key="strlist1", default_value=3, value_type="strlist"),
            ConfigOption(key="strlist2", default_value=[4, 5], value_type="strlist"),
            ConfigOption(key="strlist3", default_value="4,5", value_type="strlist"),
            ConfigOption(key="intlist1", default_value=3, value_type="intlist"),
            ConfigOption(key="intlist2", default_value="3", value_type="intlist"),
            ConfigOption(key="intlist3", default_value=(4, 5), value_type="intlist"),
            ConfigOption(key="intlist4", default_value="4,5", value_type="intlist"),
            ConfigOption(key="floatlist1", default_value=3, value_type="floatlist"),
            ConfigOption(key="none-or-str", default_value=None),
        ]

    foo = ModuleFoo()
    assert type(foo.config["str1"]) == str
    assert type(foo.config["str2"]) == str
    assert type(foo.config["int1"]) == int
    assert type(foo.config["int2"]) == int
    assert type(foo.config["float1"]) == float
    assert type(foo.config["float2"]) == float

    assert type(foo.config["none-or-str"]) == type(None)

    assert foo.config["bool1"] is False
    assert foo.config["bool2"] is False
    assert foo.config["bool3"] is True

    assert foo.config["strlist1"] == ("3",)
    assert foo.config["strlist2"] == ("4", "5")
    assert foo.config["strlist3"] == ("4", "5")
    assert foo.config["intlist1"] == (3,)
    assert foo.config["intlist2"] == (3,)
    assert foo.config["intlist3"] == (4, 5)
    assert foo.config["intlist4"] == (4, 5)
    assert foo.config["floatlist1"] == (3.0,)

    foo = ModuleFoo({"none-or-str": "str"})
    assert type(foo.config["none-or-str"]) == str
    assert foo.config["none-or-str"] == "str"
Ejemplo n.º 3
0
def rank_modules():
    module_registry.reset()
    constants.reset()

    class Task(ModuleBase):
        module_type = "task"
        requires_random_seed = True

    @Task.register
    class ThreeRankTask(Task):
        """ A strange rank task that runs two searchers on benchmark #1 (via TwoRank) and the third searcher on benchmark #2 """

        module_name = "threerank"
        dependencies = [
            Dependency(key="tworank", module="task", name="tworank"),
            Dependency(key="rank3", module="task", name="rank"),
        ]

    @Task.register
    class TwoRankTask(Task):
        """ A rank tasks two runs two searchers on the same benchmark """

        module_name = "tworank"
        dependencies = [
            Dependency(key="benchmark",
                       module="benchmark",
                       name="rob04yang",
                       provide_this=True,
                       provide_children=["collection"]),
            Dependency(key="rank1a", module="task", name="rank"),
            Dependency(key="rank1b", module="task", name="rank"),
        ]

    @Task.register
    class RankTask(Task):
        module_name = "rank"
        dependencies = [
            Dependency(key="benchmark",
                       module="benchmark",
                       name="rob04yang",
                       provide_this=True,
                       provide_children=["collection"]),
            Dependency(key="searcher", module="searcher", name="bm25"),
        ]

    @ModuleBase.register
    class BenchmarkRob04(ModuleBase):
        module_type = "benchmark"
        module_name = "rob04yang"
        dependencies = [
            Dependency(key="collection", module="collection", name="robust04")
        ]

    @ModuleBase.register
    class BenchmarkTRECDL(ModuleBase):
        module_type = "benchmark"
        module_name = "trecdl"
        dependencies = [
            Dependency(key="collection", module="collection", name="msmarco")
        ]

    @ModuleBase.register
    class SearcherBM25(ModuleBase):
        module_type = "searcher"
        module_name = "bm25"
        dependencies = [
            Dependency(key="index", module="index", name="anserini")
        ]
        config_spec = [
            ConfigOption(key="k1",
                         default_value=1.0,
                         description="k1 parameter")
        ]
        # Searchers are unlikely to actually need a seed, but we require it for testing
        requires_random_seed = True

    @ModuleBase.register
    class IndexAnserini(ModuleBase):
        module_type = "index"
        module_name = "anserini"
        dependencies = [
            Dependency(key="collection", module="collection", name="robust04")
        ]
        config_spec = [
            ConfigOption(key="stemmer",
                         default_value="porter",
                         description="stemming")
        ]

    @ModuleBase.register
    class CollectionRobust04(ModuleBase):
        module_type = "collection"
        module_name = "robust04"

    @ModuleBase.register
    class CollectionMSMARCO(ModuleBase):
        module_type = "collection"
        module_name = "msmarco"

    return [ThreeRankTask, TwoRankTask, RankTask]
Ejemplo n.º 4
0
def rank_modules():
    module_registry.reset()
    constants.reset()

    class Task(ModuleBase):
        module_type = "task"
        requires_random_seed = True

    @Task.register
    class ThreeRankTask(Task):
        """ A strange rank task that runs two searchers on benchmark #1 (via TwoRank) and the third searcher on benchmark #2 """

        module_name = "threerank"
        dependencies = [
            Dependency(key="tworank", module="task", name="tworank"),
            Dependency(key="rank3", module="task", name="rank"),
        ]

    @Task.register
    class TwoRankTask(Task):
        """ A rank tasks two runs two searchers on the same benchmark """

        module_name = "tworank"
        dependencies = [
            Dependency(key="benchmark",
                       module="benchmark",
                       name="rob04yang",
                       provide_this=True,
                       provide_children=["collection"]),
            Dependency(key="rank1a", module="task", name="rank"),
            Dependency(key="rank1b", module="task", name="rank"),
        ]

    @Task.register
    class RankTask(Task):
        module_name = "rank"
        dependencies = [
            Dependency(key="benchmark",
                       module="benchmark",
                       name="rob04yang",
                       provide_this=True,
                       provide_children=["collection"]),
            Dependency(key="searcher", module="searcher", name="bm25"),
        ]

    @Task.register
    class RerankTask(Task):
        module_name = "rerank"
        config_spec = [
            ConfigOption("fold", "s1", "fold to run"),
            ConfigOption("optimize", "map", "metric to maximize on the dev set"
                         ),  # affects train() because we check to save weights
        ]
        dependencies = [
            Dependency(key="benchmark",
                       module="benchmark",
                       name="rob04yang",
                       provide_this=True,
                       provide_children=["collection"]),
            Dependency(key="rank", module="task", name="rank"),
            Dependency(key="reranker", module="reranker", name="DRMM"),
        ]

    @ModuleBase.register
    class BenchmarkRob04(ModuleBase):
        module_type = "benchmark"
        module_name = "rob04yang"
        dependencies = [
            Dependency(key="collection", module="collection", name="robust04")
        ]

    @ModuleBase.register
    class BenchmarkTRECDL(ModuleBase):
        module_type = "benchmark"
        module_name = "trecdl"
        dependencies = [
            Dependency(key="collection", module="collection", name="msmarco")
        ]

    @ModuleBase.register
    class SearcherBM25(ModuleBase):
        module_type = "searcher"
        module_name = "bm25"
        dependencies = [
            Dependency(key="index", module="index", name="anserini")
        ]
        config_spec = [
            ConfigOption(key="k1",
                         default_value=1.0,
                         description="k1 parameter")
        ]
        # Searchers are unlikely to actually need a seed, but we require it for testing
        requires_random_seed = True

    @ModuleBase.register
    class IndexAnserini(ModuleBase):
        module_type = "index"
        module_name = "anserini"
        dependencies = [
            Dependency(key="collection", module="collection", name="robust04")
        ]
        config_spec = [
            ConfigOption(key="stemmer",
                         default_value="porter",
                         description="stemming")
        ]

    @ModuleBase.register
    class CollectionRobust04(ModuleBase):
        module_type = "collection"
        module_name = "robust04"

    @ModuleBase.register
    class CollectionMSMARCO(ModuleBase):
        module_type = "collection"
        module_name = "msmarco"

    @ModuleBase.register
    class ExtractorEmbedtext(ModuleBase):
        module_type = "extractor"
        module_name = "embedtext"

        dependencies = [
            Dependency(key="index",
                       module="index",
                       name="anserini",
                       default_config_overrides={"stemmer": "none"}),
            Dependency(key="tokenizer", module="tokenizer", name="anserini"),
        ]
        config_spec = [
            ConfigOption("embeddings", "glove6b"),
            ConfigOption("zerounk", False),
            ConfigOption("calcidf", True),
            ConfigOption("maxqlen", 4),
            ConfigOption("maxdoclen", 800),
            ConfigOption("usecache", False),
        ]

    @ModuleBase.register
    class TokenizerAnserini(ModuleBase):
        module_type = "tokenizer"
        module_name = "anserini"
        config_spec = [
            ConfigOption("keepstops", True, "keep stopwords if True"),
            ConfigOption("stemmer", "none",
                         "stemmer: porter, krovetz, or none"),
        ]

    @ModuleBase.register
    class TrainerPytorch(ModuleBase):
        module_type = "trainer"
        module_name = "pytorch"
        config_spec = [
            ConfigOption("batch", 32, "batch size"),
            ConfigOption("niters", 20),
            ConfigOption("itersize", 512),
            ConfigOption("gradacc", 1),
            ConfigOption("lr", 0.001),
            ConfigOption("softmaxloss", False),
            ConfigOption("fastforward", False),
            ConfigOption("validatefreq", 1),
            ConfigOption("boardname", "default"),
        ]
        config_keys_not_in_path = ["fastforward", "boardname"]

    @ModuleBase.register
    class RerankerDRMM(ModuleBase):
        module_type = "reranker"
        module_name = "DRMM"
        dependencies = [
            Dependency(key="extractor", module="extractor", name="embedtext"),
            Dependency(key="trainer", module="trainer", name="pytorch"),
        ]
        config_spec = [
            ConfigOption("nbins", 29, "number of bins in matching histogram"),
            ConfigOption("nodes", 5,
                         "hidden layer dimension for matching network"),
            ConfigOption("histType", "LCH", "histogram type: CH, NH, LCH"),
            ConfigOption("gateType", "IDF", "term gate type: TV or IDF"),
        ]

    return [ThreeRankTask, TwoRankTask, RankTask, RerankTask]