def create_cord19_application_package(): app_package = ApplicationPackage(name="cord19") app_package.schema.add_fields( Field(name="id", type="string", indexing=["attribute", "summary"]), Field( name="title", type="string", indexing=["index", "summary"], index="enable-bm25", ), ) app_package.schema.add_field_set(FieldSet(name="default", fields=["title"])) app_package.schema.add_rank_profile( RankProfile(name="bm25", first_phase="bm25(title)")) bert_config = BertModelConfig( model_id="pretrained_bert_tiny", tokenizer="google/bert_uncased_L-2_H-128_A-2", model="google/bert_uncased_L-2_H-128_A-2", query_input_size=5, doc_input_size=10, ) app_package.add_model_ranking( model_config=bert_config, include_model_summary_features=True, inherits="default", first_phase="bm25(title)", second_phase=SecondPhaseRanking(rerank_count=10, expression="logit1"), ) return app_package
def setUp(self) -> None: self.app_package = ApplicationPackage(name="test_app") self.app_package.schema.add_fields( Field(name="id", type="string", indexing=["attribute", "summary"]), Field( name="title", type="string", indexing=["index", "summary"], index="enable-bm25", ), Field( name="body", type="string", indexing=["index", "summary"], index="enable-bm25", ), ) self.app_package.schema.add_field_set( FieldSet(name="default", fields=["title", "body"])) self.app_package.schema.add_rank_profile( RankProfile(name="default", first_phase="nativeRank(title, body)")) self.app_package.schema.add_rank_profile( RankProfile( name="bm25", first_phase="bm25(title) + bm25(body)", inherits="default", )) self.app_package.query_profile_type.add_fields( QueryTypeField( name="ranking.features.query(query_bert)", type="tensor<float>(x[768])", )) self.app_package.query_profile.add_fields( QueryField(name="maxHits", value=100), QueryField(name="anotherField", value="string_value"), ) bert_config = BertModelConfig( model_id="bert_tiny", query_input_size=4, doc_input_size=8, tokenizer=os.path.join(os.environ["RESOURCES_DIR"], "bert_tiny_tokenizer"), model=os.path.join(os.environ["RESOURCES_DIR"], "bert_tiny_model"), ) self.app_package.add_model_ranking( model_config=bert_config, include_model_summary_features=True, inherits="default", first_phase="bm25(title)", second_phase=SecondPhaseRanking(rerank_count=10, expression="logit1"), )
def setUp(self) -> None: # # Create application package # self.app_package = ApplicationPackage(name="cord19") self.app_package.schema.add_fields( Field(name="cord_uid", type="string", indexing=["attribute", "summary"]), Field( name="title", type="string", indexing=["index", "summary"], index="enable-bm25", ), ) self.app_package.schema.add_field_set( FieldSet(name="default", fields=["title"]) ) self.app_package.schema.add_rank_profile( RankProfile(name="bm25", first_phase="bm25(title)") ) self.bert_config = BertModelConfig( model_id="pretrained_bert_tiny", tokenizer="google/bert_uncased_L-2_H-128_A-2", model="google/bert_uncased_L-2_H-128_A-2", query_input_size=5, doc_input_size=10, ) self.app_package.add_model_ranking( model_config=self.bert_config, include_model_summary_features=True, inherits="default", first_phase="bm25(title)", second_phase=SecondPhaseRanking(rerank_count=10, expression="logit1"), ) # # Deploy on Vespa Cloud # self.vespa_cloud = VespaCloud( tenant="vespa-team", application="pyvespa-integration", key_content=os.getenv("VESPA_CLOUD_USER_KEY").replace(r"\n", "\n"), application_package=self.app_package, ) self.disk_folder = os.path.join(os.getenv("WORK_DIR"), "sample_application") self.instance_name = "test" self.app = self.vespa_cloud.deploy( instance=self.instance_name, disk_folder=self.disk_folder )
def setUp(self) -> None: test_schema = Schema( name="msmarco", document=Document( fields=[ Field(name="id", type="string", indexing=["attribute", "summary"]), Field( name="title", type="string", indexing=["index", "summary"], index="enable-bm25", ), Field( name="body", type="string", indexing=["index", "summary"], index="enable-bm25", ), ] ), fieldsets=[FieldSet(name="default", fields=["title", "body"])], rank_profiles=[ RankProfile(name="default", first_phase="nativeRank(title, body)"), RankProfile( name="bm25", first_phase="bm25(title) + bm25(body)", inherits="default", ), ], ) self.app_package = ApplicationPackage(name="test_app", schema=test_schema)
def setUp(self) -> None: # # Create application package # document = Document( fields=[ Field(name="id", type="string", indexing=["attribute", "summary"]), Field( name="title", type="string", indexing=["index", "summary"], index="enable-bm25", ), Field( name="body", type="string", indexing=["index", "summary"], index="enable-bm25", ), Field( name="metadata", type="string", indexing=["attribute", "summary"], attribute=["fast-search", "fast-access"], ), Field( name="tensor_field", type="tensor<float>(x[128])", indexing=["attribute"], ann=HNSW( distance_metric="euclidean", max_links_per_node=16, neighbors_to_explore_at_insert=200, ), ), ] ) msmarco_schema = Schema( name="msmarco", document=document, fieldsets=[FieldSet(name="default", fields=["title", "body"])], rank_profiles=[ RankProfile(name="default", first_phase="nativeRank(title, body)") ], ) app_package = ApplicationPackage(name="msmarco", schema=msmarco_schema) # # Deploy on Vespa Cloud # self.vespa_cloud = VespaCloud( tenant="vespa-team", application="pyvespa-integration", key_content=os.getenv("VESPA_CLOUD_USER_KEY").replace(r"\n", "\n"), application_package=app_package, ) self.disk_folder = os.path.join(os.getenv("WORK_DIR"), "sample_application") self.instance_name = "test" self.app = self.vespa_cloud.deploy( instance=self.instance_name, disk_folder=self.disk_folder )
def setUp(self) -> None: # # Create application package # self.app_package = ApplicationPackage(name="cord19") self.app_package.schema.add_fields( Field(name="cord_uid", type="string", indexing=["attribute", "summary"]), Field( name="title", type="string", indexing=["index", "summary"], index="enable-bm25", ), ) self.app_package.schema.add_field_set( FieldSet(name="default", fields=["title"])) self.app_package.schema.add_rank_profile( RankProfile(name="bm25", first_phase="bm25(title)")) self.bert_config = BertModelConfig( model_id="pretrained_bert_tiny", tokenizer="google/bert_uncased_L-2_H-128_A-2", model="google/bert_uncased_L-2_H-128_A-2", query_input_size=5, doc_input_size=10, ) self.app_package.add_model_ranking( model_config=self.bert_config, include_model_summary_features=True, inherits="default", first_phase="bm25(title)", second_phase=SecondPhaseRanking(rerank_count=10, expression="logit1"), ) self.disk_folder = os.path.join(os.getenv("WORK_DIR"), "sample_application") self.vespa_docker = VespaDocker(port=8089) self.app = self.vespa_docker.deploy( application_package=self.app_package, disk_folder=self.disk_folder)
def setUp(self) -> None: self.app_package = ApplicationPackage(name="test_app") self.app_package.schema.add_fields( Field(name="id", type="string", indexing=["attribute", "summary"]), Field( name="title", type="string", indexing=["index", "summary"], index="enable-bm25", ), Field( name="body", type="string", indexing=["index", "summary"], index="enable-bm25", ), Field( name="tensor_field", type="tensor<float>(x[128])", indexing=["attribute"], attribute=["fast-search", "fast-access"], ann=HNSW( distance_metric="euclidean", max_links_per_node=16, neighbors_to_explore_at_insert=200, ), ), ) self.app_package.schema.add_field_set( FieldSet(name="default", fields=["title", "body"])) self.app_package.schema.add_rank_profile( RankProfile(name="default", first_phase="nativeRank(title, body)")) self.app_package.schema.add_rank_profile( RankProfile( name="bm25", first_phase="bm25(title) + bm25(body)", inherits="default", )) self.app_package.query_profile_type.add_fields( QueryTypeField( name="ranking.features.query(query_bert)", type="tensor<float>(x[768])", )) self.app_package.query_profile.add_fields( QueryField(name="maxHits", value=100), QueryField(name="anotherField", value="string_value"), )
def setUp(self) -> None: # # Create application package # document = Document(fields=[ Field(name="id", type="string", indexing=["attribute", "summary"]), Field( name="title", type="string", indexing=["index", "summary"], index="enable-bm25", ), Field( name="body", type="string", indexing=["index", "summary"], index="enable-bm25", ), Field( name="metadata", type="string", indexing=["attribute", "summary"], attribute=["fast-search", "fast-access"], ), Field( name="tensor_field", type="tensor<float>(x[128])", indexing=["attribute"], ann=HNSW( distance_metric="euclidean", max_links_per_node=16, neighbors_to_explore_at_insert=200, ), ), ]) msmarco_schema = Schema( name="msmarco", document=document, fieldsets=[FieldSet(name="default", fields=["title", "body"])], rank_profiles=[ RankProfile(name="default", first_phase="nativeRank(title, body)") ], ) self.app_package = ApplicationPackage(name="msmarco", schema=msmarco_schema) self.disk_folder = os.path.join(os.getenv("WORK_DIR"), "sample_application")
def create_msmarco_application_package(): # # Application package # document = Document(fields=[ Field(name="id", type="string", indexing=["attribute", "summary"]), Field( name="title", type="string", indexing=["index", "summary"], index="enable-bm25", ), Field( name="body", type="string", indexing=["index", "summary"], index="enable-bm25", ), Field( name="metadata", type="string", indexing=["attribute", "summary"], attribute=["fast-search", "fast-access"], ), Field( name="tensor_field", type="tensor<float>(x[128])", indexing=["attribute", "index"], ann=HNSW( distance_metric="euclidean", max_links_per_node=16, neighbors_to_explore_at_insert=200, ), ), ]) msmarco_schema = Schema( name="msmarco", document=document, fieldsets=[FieldSet(name="default", fields=["title", "body"])], rank_profiles=[ RankProfile(name="default", first_phase="nativeRank(title, body)") ], ) app_package = ApplicationPackage(name="msmarco", schema=[msmarco_schema]) return app_package
def test_application_package(self): self.assertEqual( self.app_package, ApplicationPackage.from_dict(self.app_package.to_dict) )
indexing=["index", "summary"], index="enable-bm25"), Field(name="body", type="string", indexing=["index", "summary"], index="enable-bm25") ]) from vespa.package import Schema, FieldSet, RankProfile msmarco_schema = Schema( name="msmarco", document=document, fieldsets=[FieldSet(name="default", fields=["title", "body"])], rank_profiles=[ RankProfile(name="default", first_phase="nativeRank(title, body)") ]) from vespa.package import ApplicationPackage app_package = ApplicationPackage(name="msmarco", schema=msmarco_schema) from vespa.package import VespaDocker path = "mnt/c/Users/User/OneDrive - NTNU/NTNU/Prosjekt oppgave NLP/" name = "virke_denne_gangen/" app_path = path + name vespa_docker = VespaDocker() vespa_docker.deploy(application_package=app_package, disk_folder=app_path)
class TestSimplifiedApplicationPackageAddBertRanking(unittest.TestCase): def setUp(self) -> None: self.app_package = ApplicationPackage(name="test_app") self.app_package.schema.add_fields( Field(name="id", type="string", indexing=["attribute", "summary"]), Field( name="title", type="string", indexing=["index", "summary"], index="enable-bm25", ), Field( name="body", type="string", indexing=["index", "summary"], index="enable-bm25", ), ) self.app_package.schema.add_field_set( FieldSet(name="default", fields=["title", "body"])) self.app_package.schema.add_rank_profile( RankProfile(name="default", first_phase="nativeRank(title, body)")) self.app_package.schema.add_rank_profile( RankProfile( name="bm25", first_phase="bm25(title) + bm25(body)", inherits="default", )) self.app_package.query_profile_type.add_fields( QueryTypeField( name="ranking.features.query(query_bert)", type="tensor<float>(x[768])", )) self.app_package.query_profile.add_fields( QueryField(name="maxHits", value=100), QueryField(name="anotherField", value="string_value"), ) bert_config = BertModelConfig( model_id="bert_tiny", query_input_size=4, doc_input_size=8, tokenizer=os.path.join(os.environ["RESOURCES_DIR"], "bert_tiny_tokenizer"), model=os.path.join(os.environ["RESOURCES_DIR"], "bert_tiny_model"), ) self.app_package.add_model_ranking( model_config=bert_config, include_model_summary_features=True, inherits="default", first_phase="bm25(title)", second_phase=SecondPhaseRanking(rerank_count=10, expression="logit1"), ) def test_application_package(self): self.assertEqual( self.app_package, ApplicationPackage.from_dict(self.app_package.to_dict)) def test_schema_to_text(self): expected_result = ( "schema test_app {\n" " document test_app {\n" " field id type string {\n" " indexing: attribute | summary\n" " }\n" " field title type string {\n" " indexing: index | summary\n" " index: enable-bm25\n" " }\n" " field body type string {\n" " indexing: index | summary\n" " index: enable-bm25\n" " }\n" " field bert_tiny_doc_token_ids type tensor<float>(d0[7]) {\n" " indexing: attribute | summary\n" " }\n" " }\n" " fieldset default {\n" " fields: title, body\n" " }\n" " onnx-model bert_tiny {\n" " file: files/bert_tiny.onnx\n" " input input_ids: input_ids\n" " input token_type_ids: token_type_ids\n" " input attention_mask: attention_mask\n" " output output_0: logits\n" " }\n" " rank-profile default {\n" " first-phase {\n" " expression: nativeRank(title, body)\n" " }\n" " }\n" " rank-profile bm25 inherits default {\n" " first-phase {\n" " expression: bm25(title) + bm25(body)\n" " }\n" " }\n" " rank-profile bert_tiny inherits default {\n" " constants {\n" " TOKEN_NONE: 0\n" " TOKEN_CLS: 101\n" " TOKEN_SEP: 102\n" " }\n" " function question_length() {\n" " expression {\n" " sum(map(query(bert_tiny_query_token_ids), f(a)(a > 0)))\n" " }\n" " }\n" " function doc_length() {\n" " expression {\n" " sum(map(attribute(bert_tiny_doc_token_ids), f(a)(a > 0)))\n" " }\n" " }\n" " function input_ids() {\n" " expression {\n" " tokenInputIds(12, query(bert_tiny_query_token_ids), attribute(bert_tiny_doc_token_ids))\n" " }\n" " }\n" " function attention_mask() {\n" " expression {\n" " tokenAttentionMask(12, query(bert_tiny_query_token_ids), attribute(bert_tiny_doc_token_ids))\n" " }\n" " }\n" " function token_type_ids() {\n" " expression {\n" " tokenTypeIds(12, query(bert_tiny_query_token_ids), attribute(bert_tiny_doc_token_ids))\n" " }\n" " }\n" " function logit0() {\n" " expression {\n" " onnx(bert_tiny).logits{d0:0,d1:0}\n" " }\n" " }\n" " function logit1() {\n" " expression {\n" " onnx(bert_tiny).logits{d0:0,d1:1}\n" " }\n" " }\n" " first-phase {\n" " expression: bm25(title)\n" " }\n" " second-phase {\n" " rerank-count: 10\n" " expression: logit1\n" " }\n" " summary-features {\n" " logit0\n" " logit1\n" " input_ids\n" " attention_mask\n" " token_type_ids\n" " }\n" " }\n" "}") self.assertEqual(self.app_package.schema_to_text, expected_result) def test_hosts_to_text(self): expected_result = ( '<?xml version="1.0" encoding="utf-8" ?>\n' "<!-- Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->\n" "<hosts>\n" ' <host name="localhost">\n' " <alias>node1</alias>\n" " </host>\n" "</hosts>") self.assertEqual(self.app_package.hosts_to_text, expected_result) def test_services_to_text(self): expected_result = ( '<?xml version="1.0" encoding="UTF-8"?>\n' '<services version="1.0">\n' ' <container id="test_app_container" version="1.0">\n' " <search></search>\n" " <document-api></document-api>\n" " </container>\n" ' <content id="test_app_content" version="1.0">\n' ' <redundancy reply-after="1">1</redundancy>\n' " <documents>\n" ' <document type="test_app" mode="index"></document>\n' " </documents>\n" " <nodes>\n" ' <node distribution-key="0" hostalias="node1"></node>\n' " </nodes>\n" " </content>\n" "</services>") self.assertEqual(self.app_package.services_to_text, expected_result) def test_query_profile_to_text(self): expected_result = ( '<query-profile id="default" type="root">\n' ' <field name="maxHits">100</field>\n' ' <field name="anotherField">string_value</field>\n' "</query-profile>") self.assertEqual(self.app_package.query_profile_to_text, expected_result) def test_query_profile_type_to_text(self): expected_result = ( '<query-profile-type id="root">\n' ' <field name="ranking.features.query(query_bert)" type="tensor<float>(x[768])" />\n' ' <field name="ranking.features.query(bert_tiny_query_token_ids)" type="tensor<float>(d0[2])" />\n' "</query-profile-type>") self.assertEqual(self.app_package.query_profile_type_to_text, expected_result)
def setUp(self) -> None: test_schema = Schema( name="msmarco", document=Document(fields=[ Field(name="id", type="string", indexing=["attribute", "summary"]), Field( name="title", type="string", indexing=["index", "summary"], index="enable-bm25", ), Field( name="body", type="string", indexing=["index", "summary"], index="enable-bm25", ), Field( name="embedding", type="tensor<float>(x[128])", indexing=["attribute", "summary"], attribute=["fast-search", "fast-access"], ), ]), fieldsets=[FieldSet(name="default", fields=["title", "body"])], rank_profiles=[ RankProfile(name="default", first_phase="nativeRank(title, body)"), RankProfile( name="bm25", first_phase="bm25(title) + bm25(body)", inherits="default", ), RankProfile( name="bert", first_phase="bm25(title) + bm25(body)", second_phase=SecondPhaseRanking( rerank_count=10, expression="sum(onnx(bert).logits{d0:0,d1:0})"), inherits="default", constants={ "TOKEN_NONE": 0, "TOKEN_CLS": 101, "TOKEN_SEP": 102 }, functions=[ Function( name="question_length", expression= "sum(map(query(query_token_ids), f(a)(a > 0)))", ), Function( name="doc_length", expression= "sum(map(attribute(doc_token_ids), f(a)(a > 0)))", ), Function( name="input_ids", expression="tensor<float>(d0[1],d1[128])(\n" " if (d1 == 0,\n" " TOKEN_CLS,\n" " if (d1 < question_length + 1,\n" " query(query_token_ids){d0:(d1-1)},\n" " if (d1 == question_length + 1,\n" " TOKEN_SEP,\n" " if (d1 < question_length + doc_length + 2,\n" " attribute(doc_token_ids){d0:(d1-question_length-2)},\n" " if (d1 == question_length + doc_length + 2,\n" " TOKEN_SEP,\n" " TOKEN_NONE\n" " ))))))", ), Function( name="attention_mask", expression="map(input_ids, f(a)(a > 0))", ), Function( name="token_type_ids", expression="tensor<float>(d0[1],d1[128])(\n" " if (d1 < question_length,\n" " 0,\n" " if (d1 < question_length + doc_length,\n" " 1,\n" " TOKEN_NONE\n" " )))", ), ], summary_features=[ "onnx(bert).logits", "input_ids", "attention_mask", "token_type_ids", ], ), ], models=[ OnnxModel( model_name="bert", model_file_path="bert.onnx", inputs={ "input_ids": "input_ids", "token_type_ids": "token_type_ids", "attention_mask": "attention_mask", }, outputs={"logits": "logits"}, ) ], ) test_query_profile_type = QueryProfileType(fields=[ QueryTypeField( name="ranking.features.query(query_bert)", type="tensor<float>(x[768])", ) ]) test_query_profile = QueryProfile(fields=[ QueryField(name="maxHits", value=100), QueryField(name="anotherField", value="string_value"), ]) self.app_package = ApplicationPackage( name="test_app", schema=test_schema, query_profile=test_query_profile, query_profile_type=test_query_profile_type, )
class TestOnnxModelDockerDeployment(unittest.TestCase): def setUp(self) -> None: # # Create application package # self.app_package = ApplicationPackage(name="cord19") self.app_package.schema.add_fields( Field(name="cord_uid", type="string", indexing=["attribute", "summary"]), Field( name="title", type="string", indexing=["index", "summary"], index="enable-bm25", ), ) self.app_package.schema.add_field_set( FieldSet(name="default", fields=["title"])) self.app_package.schema.add_rank_profile( RankProfile(name="bm25", first_phase="bm25(title)")) self.bert_config = BertModelConfig( model_id="pretrained_bert_tiny", tokenizer="google/bert_uncased_L-2_H-128_A-2", model="google/bert_uncased_L-2_H-128_A-2", query_input_size=5, doc_input_size=10, ) self.app_package.add_model_ranking( model_config=self.bert_config, include_model_summary_features=True, inherits="default", first_phase="bm25(title)", second_phase=SecondPhaseRanking(rerank_count=10, expression="logit1"), ) self.disk_folder = os.path.join(os.getenv("WORK_DIR"), "sample_application") self.vespa_docker = VespaDocker(port=8089) self.app = self.vespa_docker.deploy( application_package=self.app_package, disk_folder=self.disk_folder) def test_deploy(self): self.assertTrue( any( re.match("Generation: [0-9]+", line) for line in self.app.deployment_message)) self.assertEqual(self.app.get_application_status().status_code, 200) def test_data_operation(self): # # Get data that does not exist # self.assertEqual( self.app.get_data(schema="cord19", data_id="1").status_code, 404) # # Feed a data point # fields = { "cord_uid": "1", "title": "this is my first title", } fields.update(self.bert_config.doc_fields(text=str(fields["title"]))) response = self.app.feed_data_point( schema="cord19", data_id="1", fields=fields, ) self.assertEqual(response.json()["id"], "id:cord19:cord19::1") # # Get data that exist # response = self.app.get_data(schema="cord19", data_id="1") self.assertEqual(response.status_code, 200) embedding_values = fields["pretrained_bert_tiny_doc_token_ids"][ "values"] self.assertDictEqual( response.json(), { "fields": { "cord_uid": "1", "title": "this is my first title", "pretrained_bert_tiny_doc_token_ids": { "cells": [{ "address": { "d0": str(x) }, "value": float(embedding_values[x]), } for x in range(len(embedding_values))] }, }, "id": "id:cord19:cord19::1", "pathId": "/document/v1/cord19/cord19/docid/1", }, ) # # Update data # fields = {"title": "this is my updated title"} fields.update(self.bert_config.doc_fields(text=str(fields["title"]))) response = self.app.update_data(schema="cord19", data_id="1", fields=fields) self.assertEqual(response.json()["id"], "id:cord19:cord19::1") # # Get the updated data point # response = self.app.get_data(schema="cord19", data_id="1") self.assertEqual(response.status_code, 200) embedding_values = fields["pretrained_bert_tiny_doc_token_ids"][ "values"] self.assertDictEqual( response.json(), { "fields": { "cord_uid": "1", "title": "this is my updated title", "pretrained_bert_tiny_doc_token_ids": { "cells": [{ "address": { "d0": str(x) }, "value": float(embedding_values[x]), } for x in range(len(embedding_values))] }, }, "id": "id:cord19:cord19::1", "pathId": "/document/v1/cord19/cord19/docid/1", }, ) # # Delete a data point # response = self.app.delete_data(schema="cord19", data_id="1") self.assertEqual(response.json()["id"], "id:cord19:cord19::1") # # Deleted data should be gone # self.assertEqual( self.app.get_data(schema="cord19", data_id="1").status_code, 404) def _parse_vespa_tensor(self, hit, feature): return [ x["value"] for x in hit["fields"]["summaryfeatures"][feature]["cells"] ] def test_rank_input_output(self): # # Feed a data point # fields = { "cord_uid": "1", "title": "this is my first title", } fields.update(self.bert_config.doc_fields(text=str(fields["title"]))) response = self.app.feed_data_point( schema="cord19", data_id="1", fields=fields, ) self.assertEqual(response.json()["id"], "id:cord19:cord19::1") # # Run a test query # result = self.app.query( query="this is a test", query_model=QueryModel( query_properties=[ QueryRankingFeature( name=self.bert_config.query_token_ids_name, mapping=self.bert_config.query_tensor_mapping, ) ], match_phase=OR(), rank_profile=Ranking(name="pretrained_bert_tiny"), ), ) vespa_input_ids = self._parse_vespa_tensor( result.hits[0], "rankingExpression(input_ids)") vespa_attention_mask = self._parse_vespa_tensor( result.hits[0], "rankingExpression(attention_mask)") vespa_token_type_ids = self._parse_vespa_tensor( result.hits[0], "rankingExpression(token_type_ids)") expected_inputs = self.bert_config.create_encodings( queries=["this is a test"], docs=["this is my first title"]) self.assertEqual(vespa_input_ids, expected_inputs["input_ids"][0]) self.assertEqual(vespa_attention_mask, expected_inputs["attention_mask"][0]) self.assertEqual(vespa_token_type_ids, expected_inputs["token_type_ids"][0]) expected_logits = self.bert_config.predict( queries=["this is a test"], docs=["this is my first title"]) self.assertAlmostEqual( result.hits[0]["fields"]["summaryfeatures"] ["rankingExpression(logit0)"], expected_logits[0][0], 5, ) self.assertAlmostEqual( result.hits[0]["fields"]["summaryfeatures"] ["rankingExpression(logit1)"], expected_logits[0][1], 5, ) def tearDown(self) -> None: shutil.rmtree(self.disk_folder, ignore_errors=True) self.vespa_docker.container.stop() self.vespa_docker.container.remove()