コード例 #1
0
ファイル: test_package.py プロジェクト: vdvorak/vespa
 def setUp(self) -> None:
     test_schema = Schema(
         name="msmarco",
         document=Document(
             fields=[
                 Field(name="id", type="string", indexing=["attribute", "summary"]),
                 Field(
                     name="title",
                     type="string",
                     indexing=["index", "summary"],
                     index="enable-bm25",
                 ),
                 Field(
                     name="body",
                     type="string",
                     indexing=["index", "summary"],
                     index="enable-bm25",
                 ),
             ]
         ),
         fieldsets=[FieldSet(name="default", fields=["title", "body"])],
         rank_profiles=[
             RankProfile(name="default", first_phase="nativeRank(title, body)"),
             RankProfile(
                 name="bm25",
                 first_phase="bm25(title) + bm25(body)",
                 inherits="default",
             ),
         ],
     )
     self.app_package = ApplicationPackage(name="test_app", schema=test_schema)
コード例 #2
0
 def setUp(self) -> None:
     #
     # Create application package
     #
     document = Document(
         fields=[
             Field(name="id", type="string", indexing=["attribute", "summary"]),
             Field(
                 name="title",
                 type="string",
                 indexing=["index", "summary"],
                 index="enable-bm25",
             ),
             Field(
                 name="body",
                 type="string",
                 indexing=["index", "summary"],
                 index="enable-bm25",
             ),
             Field(
                 name="metadata",
                 type="string",
                 indexing=["attribute", "summary"],
                 attribute=["fast-search", "fast-access"],
             ),
             Field(
                 name="tensor_field",
                 type="tensor<float>(x[128])",
                 indexing=["attribute"],
                 ann=HNSW(
                     distance_metric="euclidean",
                     max_links_per_node=16,
                     neighbors_to_explore_at_insert=200,
                 ),
             ),
         ]
     )
     msmarco_schema = Schema(
         name="msmarco",
         document=document,
         fieldsets=[FieldSet(name="default", fields=["title", "body"])],
         rank_profiles=[
             RankProfile(name="default", first_phase="nativeRank(title, body)")
         ],
     )
     app_package = ApplicationPackage(name="msmarco", schema=msmarco_schema)
     #
     # Deploy on Vespa Cloud
     #
     self.vespa_cloud = VespaCloud(
         tenant="vespa-team",
         application="pyvespa-integration",
         key_content=os.getenv("VESPA_CLOUD_USER_KEY").replace(r"\n", "\n"),
         application_package=app_package,
     )
     self.disk_folder = os.path.join(os.getenv("WORK_DIR"), "sample_application")
     self.instance_name = "test"
     self.app = self.vespa_cloud.deploy(
         instance=self.instance_name, disk_folder=self.disk_folder
     )
コード例 #3
0
ファイル: test_package.py プロジェクト: vdvorak/vespa
 def test_field_name_type_indexing_index(self):
     field = Field(
         name="body",
         type="string",
         indexing=["index", "summary"],
         index="enable-bm25",
     )
     self.assertEqual(field.name, "body")
     self.assertEqual(field.type, "string")
     self.assertEqual(field.indexing, ["index", "summary"])
     self.assertEqual(field.index, "enable-bm25")
     self.assertEqual(
         field.to_dict,
         {
             "name": "body",
             "type": "string",
             "indexing": ["index", "summary"],
             "index": "enable-bm25",
         },
     )
     self.assertEqual(
         field,
         Field(
             name="body",
             type="string",
             indexing=["index", "summary"],
             index="enable-bm25",
         ),
     )
     self.assertEqual(field, Field.from_dict(field.to_dict))
     self.assertEqual(field.indexing_to_text, "index | summary")
コード例 #4
0
def create_cord19_application_package():
    app_package = ApplicationPackage(name="cord19")
    app_package.schema.add_fields(
        Field(name="id", type="string", indexing=["attribute", "summary"]),
        Field(
            name="title",
            type="string",
            indexing=["index", "summary"],
            index="enable-bm25",
        ),
    )
    app_package.schema.add_field_set(FieldSet(name="default",
                                              fields=["title"]))
    app_package.schema.add_rank_profile(
        RankProfile(name="bm25", first_phase="bm25(title)"))
    bert_config = BertModelConfig(
        model_id="pretrained_bert_tiny",
        tokenizer="google/bert_uncased_L-2_H-128_A-2",
        model="google/bert_uncased_L-2_H-128_A-2",
        query_input_size=5,
        doc_input_size=10,
    )
    app_package.add_model_ranking(
        model_config=bert_config,
        include_model_summary_features=True,
        inherits="default",
        first_phase="bm25(title)",
        second_phase=SecondPhaseRanking(rerank_count=10, expression="logit1"),
    )
    return app_package
コード例 #5
0
ファイル: test_package.py プロジェクト: vdvorak/vespa
 def test_field_name_type(self):
     field = Field(name="test_name", type="string")
     self.assertEqual(field.name, "test_name")
     self.assertEqual(field.type, "string")
     self.assertEqual(field.to_dict, {"name": "test_name", "type": "string"})
     self.assertEqual(field, Field(name="test_name", type="string"))
     self.assertEqual(field, Field.from_dict(field.to_dict))
     self.assertIsNone(field.indexing_to_text)
コード例 #6
0
def create_qa_application_package():
    app_package = QuestionAnswering()
    #
    # Our test suite requires that each schema has a 'id' field
    #
    app_package.get_schema("sentence").add_fields(
        Field(name="id", type="string", indexing=["attribute", "summary"]))
    app_package.get_schema("context").add_fields(
        Field(name="id", type="string", indexing=["attribute", "summary"]))
    return app_package
コード例 #7
0
ファイル: test_package.py プロジェクト: ausnews/pyvespa
    def setUp(self) -> None:
        self.app_package = ApplicationPackage(name="test_app")

        self.app_package.schema.add_fields(
            Field(name="id", type="string", indexing=["attribute", "summary"]),
            Field(
                name="title",
                type="string",
                indexing=["index", "summary"],
                index="enable-bm25",
            ),
            Field(
                name="body",
                type="string",
                indexing=["index", "summary"],
                index="enable-bm25",
            ),
        )
        self.app_package.schema.add_field_set(
            FieldSet(name="default", fields=["title", "body"]))
        self.app_package.schema.add_rank_profile(
            RankProfile(name="default", first_phase="nativeRank(title, body)"))
        self.app_package.schema.add_rank_profile(
            RankProfile(
                name="bm25",
                first_phase="bm25(title) + bm25(body)",
                inherits="default",
            ))
        self.app_package.query_profile_type.add_fields(
            QueryTypeField(
                name="ranking.features.query(query_bert)",
                type="tensor<float>(x[768])",
            ))
        self.app_package.query_profile.add_fields(
            QueryField(name="maxHits", value=100),
            QueryField(name="anotherField", value="string_value"),
        )

        bert_config = BertModelConfig(
            model_id="bert_tiny",
            query_input_size=4,
            doc_input_size=8,
            tokenizer=os.path.join(os.environ["RESOURCES_DIR"],
                                   "bert_tiny_tokenizer"),
            model=os.path.join(os.environ["RESOURCES_DIR"], "bert_tiny_model"),
        )

        self.app_package.add_model_ranking(
            model_config=bert_config,
            include_model_summary_features=True,
            inherits="default",
            first_phase="bm25(title)",
            second_phase=SecondPhaseRanking(rerank_count=10,
                                            expression="logit1"),
        )
コード例 #8
0
ファイル: test_package.py プロジェクト: ausnews/pyvespa
 def test_tensor_with_hsnw(self):
     field = Field(
         name="tensor_field",
         type="tensor<float>(x[128])",
         indexing=["attribute"],
         attribute=["fast-search", "fast-access"],
         ann=HNSW(
             distance_metric="enclidean",
             max_links_per_node=16,
             neighbors_to_explore_at_insert=200,
         ),
     )
     self.assertEqual(field, Field.from_dict(field.to_dict))
コード例 #9
0
ファイル: test_package.py プロジェクト: vdvorak/vespa
 def test_document_two_fields(self):
     document = Document()
     field_1 = Field(name="test_name", type="string")
     field_2 = Field(
         name="body",
         type="string",
         indexing=["index", "summary"],
         index="enable-bm25",
     )
     document.add_fields(field_1, field_2)
     self.assertEqual(document.fields, [field_1, field_2])
     self.assertEqual(document, Document.from_dict(document.to_dict))
     self.assertEqual(document, Document([field_1, field_2]))
コード例 #10
0
ファイル: test_package.py プロジェクト: ausnews/pyvespa
    def setUp(self) -> None:
        self.app_package = ApplicationPackage(name="test_app")

        self.app_package.schema.add_fields(
            Field(name="id", type="string", indexing=["attribute", "summary"]),
            Field(
                name="title",
                type="string",
                indexing=["index", "summary"],
                index="enable-bm25",
            ),
            Field(
                name="body",
                type="string",
                indexing=["index", "summary"],
                index="enable-bm25",
            ),
            Field(
                name="tensor_field",
                type="tensor<float>(x[128])",
                indexing=["attribute"],
                attribute=["fast-search", "fast-access"],
                ann=HNSW(
                    distance_metric="euclidean",
                    max_links_per_node=16,
                    neighbors_to_explore_at_insert=200,
                ),
            ),
        )
        self.app_package.schema.add_field_set(
            FieldSet(name="default", fields=["title", "body"]))
        self.app_package.schema.add_rank_profile(
            RankProfile(name="default", first_phase="nativeRank(title, body)"))
        self.app_package.schema.add_rank_profile(
            RankProfile(
                name="bm25",
                first_phase="bm25(title) + bm25(body)",
                inherits="default",
            ))
        self.app_package.query_profile_type.add_fields(
            QueryTypeField(
                name="ranking.features.query(query_bert)",
                type="tensor<float>(x[768])",
            ))
        self.app_package.query_profile.add_fields(
            QueryField(name="maxHits", value=100),
            QueryField(name="anotherField", value="string_value"),
        )
コード例 #11
0
 def setUp(self) -> None:
     #
     # Create application package
     #
     document = Document(fields=[
         Field(name="id", type="string", indexing=["attribute", "summary"]),
         Field(
             name="title",
             type="string",
             indexing=["index", "summary"],
             index="enable-bm25",
         ),
         Field(
             name="body",
             type="string",
             indexing=["index", "summary"],
             index="enable-bm25",
         ),
         Field(
             name="metadata",
             type="string",
             indexing=["attribute", "summary"],
             attribute=["fast-search", "fast-access"],
         ),
         Field(
             name="tensor_field",
             type="tensor<float>(x[128])",
             indexing=["attribute"],
             ann=HNSW(
                 distance_metric="euclidean",
                 max_links_per_node=16,
                 neighbors_to_explore_at_insert=200,
             ),
         ),
     ])
     msmarco_schema = Schema(
         name="msmarco",
         document=document,
         fieldsets=[FieldSet(name="default", fields=["title", "body"])],
         rank_profiles=[
             RankProfile(name="default",
                         first_phase="nativeRank(title, body)")
         ],
     )
     self.app_package = ApplicationPackage(name="msmarco",
                                           schema=msmarco_schema)
     self.disk_folder = os.path.join(os.getenv("WORK_DIR"),
                                     "sample_application")
コード例 #12
0
 def setUp(self) -> None:
     #
     # Create application package
     #
     self.app_package = ApplicationPackage(name="cord19")
     self.app_package.schema.add_fields(
         Field(name="cord_uid", type="string", indexing=["attribute", "summary"]),
         Field(
             name="title",
             type="string",
             indexing=["index", "summary"],
             index="enable-bm25",
         ),
     )
     self.app_package.schema.add_field_set(
         FieldSet(name="default", fields=["title"])
     )
     self.app_package.schema.add_rank_profile(
         RankProfile(name="bm25", first_phase="bm25(title)")
     )
     self.bert_config = BertModelConfig(
         model_id="pretrained_bert_tiny",
         tokenizer="google/bert_uncased_L-2_H-128_A-2",
         model="google/bert_uncased_L-2_H-128_A-2",
         query_input_size=5,
         doc_input_size=10,
     )
     self.app_package.add_model_ranking(
         model_config=self.bert_config,
         include_model_summary_features=True,
         inherits="default",
         first_phase="bm25(title)",
         second_phase=SecondPhaseRanking(rerank_count=10, expression="logit1"),
     )
     #
     # Deploy on Vespa Cloud
     #
     self.vespa_cloud = VespaCloud(
         tenant="vespa-team",
         application="pyvespa-integration",
         key_content=os.getenv("VESPA_CLOUD_USER_KEY").replace(r"\n", "\n"),
         application_package=self.app_package,
     )
     self.disk_folder = os.path.join(os.getenv("WORK_DIR"), "sample_application")
     self.instance_name = "test"
     self.app = self.vespa_cloud.deploy(
         instance=self.instance_name, disk_folder=self.disk_folder
     )
コード例 #13
0
ファイル: test_package.py プロジェクト: vdvorak/vespa
 def test_document_one_field(self):
     document = Document()
     field = Field(name="test_name", type="string")
     document.add_fields(field)
     self.assertEqual(document.fields, [field])
     self.assertEqual(document, Document.from_dict(document.to_dict))
     self.assertEqual(document, Document([field]))
コード例 #14
0
ファイル: test_package.py プロジェクト: vdvorak/vespa
 def test_schema(self):
     schema = Schema(
         name="test_schema",
         document=Document(fields=[Field(name="test_name", type="string")]),
         fieldsets=[FieldSet(name="default", fields=["title", "body"])],
         rank_profiles=[
             RankProfile(name="bm25", first_phase="bm25(title) + bm25(body)")
         ],
     )
     self.assertEqual(schema, Schema.from_dict(schema.to_dict))
     self.assertDictEqual(
         schema.rank_profiles,
         {"bm25": RankProfile(name="bm25", first_phase="bm25(title) + bm25(body)")},
     )
     schema.add_rank_profile(
         RankProfile(name="default", first_phase="NativeRank(title)")
     )
     self.assertDictEqual(
         schema.rank_profiles,
         {
             "bm25": RankProfile(
                 name="bm25", first_phase="bm25(title) + bm25(body)"
             ),
             "default": RankProfile(name="default", first_phase="NativeRank(title)"),
         },
     )
コード例 #15
0
 def setUp(self) -> None:
     self.app_package = create_qa_application_package()
     self.app_package.get_schema("sentence").add_fields(
         Field(name="id", type="string", indexing=["attribute", "summary"]))
     self.app_package.get_schema("context").add_fields(
         Field(name="id", type="string", indexing=["attribute", "summary"]))
     self.disk_folder = os.path.join(os.getenv("WORK_DIR"),
                                     "sample_application")
     self.vespa_docker = VespaDocker(port=8089,
                                     disk_folder=self.disk_folder)
     self.app = self.vespa_docker.deploy(
         application_package=self.app_package)
     with open(
             os.path.join(os.environ["RESOURCES_DIR"],
                          "qa_sample_sentence_data.json"),
             "r",
     ) as f:
         sample_sentence_data = json.load(f)
     self.fields_to_send_sentence = sample_sentence_data
     self.expected_fields_from_sentence_get_operation = []
     for d in sample_sentence_data:
         expected_d = {
             "id": d["id"],
             "text": d["text"],
             "dataset": d["dataset"],
             "questions": d["questions"],
             "context_id": d["context_id"],
             "sentence_embedding": {
                 "cells": [{
                     "address": {
                         "x": str(idx)
                     },
                     "value": value
                 } for idx, value in enumerate(d["sentence_embedding"]
                                               ["values"])]
             },
         }
         self.expected_fields_from_sentence_get_operation.append(expected_d)
     with open(
             os.path.join(os.environ["RESOURCES_DIR"],
                          "qa_sample_context_data.json"),
             "r",
     ) as f:
         sample_context_data = json.load(f)
     self.fields_to_send_context = sample_context_data
     self.fields_to_update = {"text": "this is my updated text"}
コード例 #16
0
def create_msmarco_application_package():
    #
    # Application package
    #
    document = Document(fields=[
        Field(name="id", type="string", indexing=["attribute", "summary"]),
        Field(
            name="title",
            type="string",
            indexing=["index", "summary"],
            index="enable-bm25",
        ),
        Field(
            name="body",
            type="string",
            indexing=["index", "summary"],
            index="enable-bm25",
        ),
        Field(
            name="metadata",
            type="string",
            indexing=["attribute", "summary"],
            attribute=["fast-search", "fast-access"],
        ),
        Field(
            name="tensor_field",
            type="tensor<float>(x[128])",
            indexing=["attribute", "index"],
            ann=HNSW(
                distance_metric="euclidean",
                max_links_per_node=16,
                neighbors_to_explore_at_insert=200,
            ),
        ),
    ])
    msmarco_schema = Schema(
        name="msmarco",
        document=document,
        fieldsets=[FieldSet(name="default", fields=["title", "body"])],
        rank_profiles=[
            RankProfile(name="default", first_phase="nativeRank(title, body)")
        ],
    )
    app_package = ApplicationPackage(name="msmarco", schema=[msmarco_schema])
    return app_package
コード例 #17
0
 def setUp(self) -> None:
     #
     # Create application package
     #
     self.app_package = ApplicationPackage(name="cord19")
     self.app_package.schema.add_fields(
         Field(name="cord_uid",
               type="string",
               indexing=["attribute", "summary"]),
         Field(
             name="title",
             type="string",
             indexing=["index", "summary"],
             index="enable-bm25",
         ),
     )
     self.app_package.schema.add_field_set(
         FieldSet(name="default", fields=["title"]))
     self.app_package.schema.add_rank_profile(
         RankProfile(name="bm25", first_phase="bm25(title)"))
     self.bert_config = BertModelConfig(
         model_id="pretrained_bert_tiny",
         tokenizer="google/bert_uncased_L-2_H-128_A-2",
         model="google/bert_uncased_L-2_H-128_A-2",
         query_input_size=5,
         doc_input_size=10,
     )
     self.app_package.add_model_ranking(
         model_config=self.bert_config,
         include_model_summary_features=True,
         inherits="default",
         first_phase="bm25(title)",
         second_phase=SecondPhaseRanking(rerank_count=10,
                                         expression="logit1"),
     )
     self.disk_folder = os.path.join(os.getenv("WORK_DIR"),
                                     "sample_application")
     self.vespa_docker = VespaDocker(port=8089)
     self.app = self.vespa_docker.deploy(
         application_package=self.app_package, disk_folder=self.disk_folder)
コード例 #18
0
ファイル: test_package.py プロジェクト: ausnews/pyvespa
 def test_schema(self):
     schema = Schema(
         name="test_schema",
         document=Document(fields=[Field(name="test_name", type="string")]),
         fieldsets=[FieldSet(name="default", fields=["title", "body"])],
         rank_profiles=[
             RankProfile(name="bm25",
                         first_phase="bm25(title) + bm25(body)")
         ],
         models=[
             OnnxModel(
                 model_name="bert",
                 model_file_path="bert.onnx",
                 inputs={
                     "input_ids": "input_ids",
                     "token_type_ids": "token_type_ids",
                     "attention_mask": "attention_mask",
                 },
                 outputs={"logits": "logits"},
             )
         ],
     )
     self.assertEqual(schema, Schema.from_dict(schema.to_dict))
     self.assertDictEqual(
         schema.rank_profiles,
         {
             "bm25":
             RankProfile(name="bm25",
                         first_phase="bm25(title) + bm25(body)")
         },
     )
     schema.add_rank_profile(
         RankProfile(name="default", first_phase="NativeRank(title)"))
     self.assertDictEqual(
         schema.rank_profiles,
         {
             "bm25":
             RankProfile(name="bm25",
                         first_phase="bm25(title) + bm25(body)"),
             "default":
             RankProfile(name="default", first_phase="NativeRank(title)"),
         },
     )
コード例 #19
0
ファイル: test_package.py プロジェクト: ausnews/pyvespa
 def setUp(self) -> None:
     test_schema = Schema(
         name="msmarco",
         document=Document(fields=[
             Field(name="id",
                   type="string",
                   indexing=["attribute", "summary"]),
             Field(
                 name="title",
                 type="string",
                 indexing=["index", "summary"],
                 index="enable-bm25",
             ),
             Field(
                 name="body",
                 type="string",
                 indexing=["index", "summary"],
                 index="enable-bm25",
             ),
             Field(
                 name="embedding",
                 type="tensor<float>(x[128])",
                 indexing=["attribute", "summary"],
                 attribute=["fast-search", "fast-access"],
             ),
         ]),
         fieldsets=[FieldSet(name="default", fields=["title", "body"])],
         rank_profiles=[
             RankProfile(name="default",
                         first_phase="nativeRank(title, body)"),
             RankProfile(
                 name="bm25",
                 first_phase="bm25(title) + bm25(body)",
                 inherits="default",
             ),
             RankProfile(
                 name="bert",
                 first_phase="bm25(title) + bm25(body)",
                 second_phase=SecondPhaseRanking(
                     rerank_count=10,
                     expression="sum(onnx(bert).logits{d0:0,d1:0})"),
                 inherits="default",
                 constants={
                     "TOKEN_NONE": 0,
                     "TOKEN_CLS": 101,
                     "TOKEN_SEP": 102
                 },
                 functions=[
                     Function(
                         name="question_length",
                         expression=
                         "sum(map(query(query_token_ids), f(a)(a > 0)))",
                     ),
                     Function(
                         name="doc_length",
                         expression=
                         "sum(map(attribute(doc_token_ids), f(a)(a > 0)))",
                     ),
                     Function(
                         name="input_ids",
                         expression="tensor<float>(d0[1],d1[128])(\n"
                         "    if (d1 == 0,\n"
                         "        TOKEN_CLS,\n"
                         "    if (d1 < question_length + 1,\n"
                         "        query(query_token_ids){d0:(d1-1)},\n"
                         "    if (d1 == question_length + 1,\n"
                         "        TOKEN_SEP,\n"
                         "    if (d1 < question_length + doc_length + 2,\n"
                         "        attribute(doc_token_ids){d0:(d1-question_length-2)},\n"
                         "    if (d1 == question_length + doc_length + 2,\n"
                         "        TOKEN_SEP,\n"
                         "        TOKEN_NONE\n"
                         "    ))))))",
                     ),
                     Function(
                         name="attention_mask",
                         expression="map(input_ids, f(a)(a > 0))",
                     ),
                     Function(
                         name="token_type_ids",
                         expression="tensor<float>(d0[1],d1[128])(\n"
                         "    if (d1 < question_length,\n"
                         "        0,\n"
                         "    if (d1 < question_length + doc_length,\n"
                         "        1,\n"
                         "        TOKEN_NONE\n"
                         "    )))",
                     ),
                 ],
                 summary_features=[
                     "onnx(bert).logits",
                     "input_ids",
                     "attention_mask",
                     "token_type_ids",
                 ],
             ),
         ],
         models=[
             OnnxModel(
                 model_name="bert",
                 model_file_path="bert.onnx",
                 inputs={
                     "input_ids": "input_ids",
                     "token_type_ids": "token_type_ids",
                     "attention_mask": "attention_mask",
                 },
                 outputs={"logits": "logits"},
             )
         ],
     )
     test_query_profile_type = QueryProfileType(fields=[
         QueryTypeField(
             name="ranking.features.query(query_bert)",
             type="tensor<float>(x[768])",
         )
     ])
     test_query_profile = QueryProfile(fields=[
         QueryField(name="maxHits", value=100),
         QueryField(name="anotherField", value="string_value"),
     ])
     self.app_package = ApplicationPackage(
         name="test_app",
         schema=test_schema,
         query_profile=test_query_profile,
         query_profile_type=test_query_profile_type,
     )
コード例 #20
0
ファイル: gallery.py プロジェクト: vespa-engine/pyvespa
 def __init__(self, name: str = "qa"):
     context_document = Document(
         fields=[
             Field(
                 name="questions",
                 type="array<int>",
                 indexing=["summary", "attribute"],
             ),
             Field(name="dataset", type="string", indexing=["summary", "attribute"]),
             Field(name="context_id", type="int", indexing=["summary", "attribute"]),
             Field(
                 name="text",
                 type="string",
                 indexing=["summary", "index"],
                 index="enable-bm25",
             ),
         ]
     )
     context_schema = Schema(
         name="context",
         document=context_document,
         fieldsets=[FieldSet(name="default", fields=["text"])],
         rank_profiles=[
             RankProfile(name="bm25", inherits="default", first_phase="bm25(text)"),
             RankProfile(
                 name="nativeRank",
                 inherits="default",
                 first_phase="nativeRank(text)",
             ),
         ],
     )
     sentence_document = Document(
         inherits="context",
         fields=[
             Field(
                 name="sentence_embedding",
                 type="tensor<float>(x[512])",
                 indexing=["attribute", "index"],
                 ann=HNSW(
                     distance_metric="euclidean",
                     max_links_per_node=16,
                     neighbors_to_explore_at_insert=500,
                 ),
             )
         ],
     )
     sentence_schema = Schema(
         name="sentence",
         document=sentence_document,
         fieldsets=[FieldSet(name="default", fields=["text"])],
         rank_profiles=[
             RankProfile(
                 name="semantic-similarity",
                 inherits="default",
                 first_phase="closeness(sentence_embedding)",
             ),
             RankProfile(name="bm25", inherits="default", first_phase="bm25(text)"),
             RankProfile(
                 name="bm25-semantic-similarity",
                 inherits="default",
                 first_phase="bm25(text) + closeness(sentence_embedding)",
             ),
         ],
     )
     super().__init__(
         name=name,
         schema=[context_schema, sentence_schema],
         query_profile=QueryProfile(),
         query_profile_type=QueryProfileType(
             fields=[
                 QueryTypeField(
                     name="ranking.features.query(query_embedding)",
                     type="tensor<float>(x[512])",
                 )
             ]
         ),
     )
コード例 #21
0
from vespa.package import Document, Field

document = Document(fields=[
    Field(name="id", type="string", indexing=["attribute", "summary"]),
    Field(name="title",
          type="string",
          indexing=["index", "summary"],
          index="enable-bm25"),
    Field(name="body",
          type="string",
          indexing=["index", "summary"],
          index="enable-bm25")
])

from vespa.package import Schema, FieldSet, RankProfile

msmarco_schema = Schema(
    name="msmarco",
    document=document,
    fieldsets=[FieldSet(name="default", fields=["title", "body"])],
    rank_profiles=[
        RankProfile(name="default", first_phase="nativeRank(title, body)")
    ])

from vespa.package import ApplicationPackage

app_package = ApplicationPackage(name="msmarco", schema=msmarco_schema)

from vespa.package import VespaDocker

path = "mnt/c/Users/User/OneDrive - NTNU/NTNU/Prosjekt oppgave NLP/"