Beispiel #1
0
def create_cord19_application_package():
    app_package = ApplicationPackage(name="cord19")
    app_package.schema.add_fields(
        Field(name="id", type="string", indexing=["attribute", "summary"]),
        Field(
            name="title",
            type="string",
            indexing=["index", "summary"],
            index="enable-bm25",
        ),
    )
    app_package.schema.add_field_set(FieldSet(name="default",
                                              fields=["title"]))
    app_package.schema.add_rank_profile(
        RankProfile(name="bm25", first_phase="bm25(title)"))
    bert_config = BertModelConfig(
        model_id="pretrained_bert_tiny",
        tokenizer="google/bert_uncased_L-2_H-128_A-2",
        model="google/bert_uncased_L-2_H-128_A-2",
        query_input_size=5,
        doc_input_size=10,
    )
    app_package.add_model_ranking(
        model_config=bert_config,
        include_model_summary_features=True,
        inherits="default",
        first_phase="bm25(title)",
        second_phase=SecondPhaseRanking(rerank_count=10, expression="logit1"),
    )
    return app_package
Beispiel #2
0
 def setUp(self) -> None:
     self.model_config = BertModelConfig(
         model_id="bert_tiny",
         query_input_size=4,
         doc_input_size=8,
         tokenizer=os.path.join(os.environ["RESOURCES_DIR"],
                                "bert_tiny_tokenizer"),
     )
Beispiel #3
0
    def setUp(self) -> None:
        self.app_package = ApplicationPackage(name="test_app")

        self.app_package.schema.add_fields(
            Field(name="id", type="string", indexing=["attribute", "summary"]),
            Field(
                name="title",
                type="string",
                indexing=["index", "summary"],
                index="enable-bm25",
            ),
            Field(
                name="body",
                type="string",
                indexing=["index", "summary"],
                index="enable-bm25",
            ),
        )
        self.app_package.schema.add_field_set(
            FieldSet(name="default", fields=["title", "body"]))
        self.app_package.schema.add_rank_profile(
            RankProfile(name="default", first_phase="nativeRank(title, body)"))
        self.app_package.schema.add_rank_profile(
            RankProfile(
                name="bm25",
                first_phase="bm25(title) + bm25(body)",
                inherits="default",
            ))
        self.app_package.query_profile_type.add_fields(
            QueryTypeField(
                name="ranking.features.query(query_bert)",
                type="tensor<float>(x[768])",
            ))
        self.app_package.query_profile.add_fields(
            QueryField(name="maxHits", value=100),
            QueryField(name="anotherField", value="string_value"),
        )

        bert_config = BertModelConfig(
            model_id="bert_tiny",
            query_input_size=4,
            doc_input_size=8,
            tokenizer=os.path.join(os.environ["RESOURCES_DIR"],
                                   "bert_tiny_tokenizer"),
            model=os.path.join(os.environ["RESOURCES_DIR"], "bert_tiny_model"),
        )

        self.app_package.add_model_ranking(
            model_config=bert_config,
            include_model_summary_features=True,
            inherits="default",
            first_phase="bm25(title)",
            second_phase=SecondPhaseRanking(rerank_count=10,
                                            expression="logit1"),
        )
Beispiel #4
0
 def setUp(self) -> None:
     #
     # Create application package
     #
     self.app_package = ApplicationPackage(name="cord19")
     self.app_package.schema.add_fields(
         Field(name="cord_uid", type="string", indexing=["attribute", "summary"]),
         Field(
             name="title",
             type="string",
             indexing=["index", "summary"],
             index="enable-bm25",
         ),
     )
     self.app_package.schema.add_field_set(
         FieldSet(name="default", fields=["title"])
     )
     self.app_package.schema.add_rank_profile(
         RankProfile(name="bm25", first_phase="bm25(title)")
     )
     self.bert_config = BertModelConfig(
         model_id="pretrained_bert_tiny",
         tokenizer="google/bert_uncased_L-2_H-128_A-2",
         model="google/bert_uncased_L-2_H-128_A-2",
         query_input_size=5,
         doc_input_size=10,
     )
     self.app_package.add_model_ranking(
         model_config=self.bert_config,
         include_model_summary_features=True,
         inherits="default",
         first_phase="bm25(title)",
         second_phase=SecondPhaseRanking(rerank_count=10, expression="logit1"),
     )
     #
     # Deploy on Vespa Cloud
     #
     self.vespa_cloud = VespaCloud(
         tenant="vespa-team",
         application="pyvespa-integration",
         key_content=os.getenv("VESPA_CLOUD_USER_KEY").replace(r"\n", "\n"),
         application_package=self.app_package,
     )
     self.disk_folder = os.path.join(os.getenv("WORK_DIR"), "sample_application")
     self.instance_name = "test"
     self.app = self.vespa_cloud.deploy(
         instance=self.instance_name, disk_folder=self.disk_folder
     )
Beispiel #5
0
 def setUp(self) -> None:
     #
     # Create application package
     #
     self.app_package = ApplicationPackage(name="cord19")
     self.app_package.schema.add_fields(
         Field(name="cord_uid",
               type="string",
               indexing=["attribute", "summary"]),
         Field(
             name="title",
             type="string",
             indexing=["index", "summary"],
             index="enable-bm25",
         ),
     )
     self.app_package.schema.add_field_set(
         FieldSet(name="default", fields=["title"]))
     self.app_package.schema.add_rank_profile(
         RankProfile(name="bm25", first_phase="bm25(title)"))
     self.bert_config = BertModelConfig(
         model_id="pretrained_bert_tiny",
         tokenizer="google/bert_uncased_L-2_H-128_A-2",
         model="google/bert_uncased_L-2_H-128_A-2",
         query_input_size=5,
         doc_input_size=10,
     )
     self.app_package.add_model_ranking(
         model_config=self.bert_config,
         include_model_summary_features=True,
         inherits="default",
         first_phase="bm25(title)",
         second_phase=SecondPhaseRanking(rerank_count=10,
                                         expression="logit1"),
     )
     self.disk_folder = os.path.join(os.getenv("WORK_DIR"),
                                     "sample_application")
     self.vespa_docker = VespaDocker(port=8089)
     self.app = self.vespa_docker.deploy(
         application_package=self.app_package, disk_folder=self.disk_folder)