Python BERTEncoder 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: baselines.vector_based

메소드/함수: BERTEncoder

hotexamples.com에서의 예제들: 2

Python BERTEncoder - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 baselines.vector_based.BERTEncoder에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: run_baseline.py 프로젝트: yetianlinguistics/conversational-datasets

 def to_method_object(self):
     """Convert the enum to an instance of `BaselineMethod`."""
     if self == self.TF_IDF:
         return keyword_based.TfIdfMethod()
     elif self == self.BM25:
         return keyword_based.BM25Method()
     elif self == self.USE_SIM:
         return vector_based.VectorSimilarityMethod(
             encoder=vector_based.TfHubEncoder(
                 "https://tfhub.dev/google/"
                 "universal-sentence-encoder/2"))
     elif self == self.USE_LARGE_SIM:
         return vector_based.VectorSimilarityMethod(
             encoder=vector_based.TfHubEncoder(
                 "https://tfhub.dev/google/"
                 "universal-sentence-encoder-large/3"))
     elif self == self.ELMO_SIM:
         return vector_based.VectorSimilarityMethod(
             encoder=vector_based.TfHubEncoder(
                 "https://tfhub.dev/google/elmo/1"))
     elif self == self.USE_MAP:
         return vector_based.VectorMappingMethod(
             encoder=vector_based.TfHubEncoder(
                 "https://tfhub.dev/google/"
                 "universal-sentence-encoder/2"))
     elif self == self.USE_LARGE_MAP:
         return vector_based.VectorMappingMethod(
             encoder=vector_based.TfHubEncoder(
                 "https://tfhub.dev/google/"
                 "universal-sentence-encoder-large/3"))
     elif self == self.ELMO_MAP:
         return vector_based.VectorMappingMethod(
             encoder=vector_based.TfHubEncoder(
                 "https://tfhub.dev/google/elmo/1"))
     elif self == self.BERT_SMALL_SIM:
         return vector_based.VectorSimilarityMethod(
             encoder=vector_based.BERTEncoder(
                 "https://tfhub.dev/google/"
                 "bert_uncased_L-12_H-768_A-12/1"))
     elif self == self.BERT_SMALL_MAP:
         return vector_based.VectorMappingMethod(
             encoder=vector_based.BERTEncoder(
                 "https://tfhub.dev/google/"
                 "bert_uncased_L-12_H-768_A-12/1"))
     elif self == self.BERT_LARGE_SIM:
         return vector_based.VectorSimilarityMethod(
             encoder=vector_based.BERTEncoder(
                 "https://tfhub.dev/google/"
                 "bert_uncased_L-24_H-1024_A-16/1"))
     elif self == self.BERT_LARGE_MAP:
         return vector_based.VectorMappingMethod(
             encoder=vector_based.BERTEncoder(
                 "https://tfhub.dev/google/"
                 "bert_uncased_L-24_H-1024_A-16/1"))
     raise ValueError("Unknown method {}".format(self))

예제 #2

파일 보기

    def test_encode(self, mock_module_cls):
        def mock_module(inputs=None, signature=None, as_dict=None):
            self.assertTrue(as_dict)
            if signature == "tokens":
                self.assertEqual({'input_mask', 'input_ids', 'segment_ids'},
                                 inputs.viewkeys())
                batch_size = tf.shape(inputs['input_ids'])[0]
                seq_len = tf.shape(inputs['input_ids'])[1]
                return {'sequence_output': tf.ones([batch_size, seq_len, 3])}
            self.assertEqual("tokenization_info", signature)
            return {
                'do_lower_case': tf.constant(True),
                'vocab_file': tf.constant(self.vocab_file),
            }

        mock_module_cls.return_value = mock_module

        encoder = vector_based.BERTEncoder("test_uri")
        self.assertEqual([(("test_uri", ), {
            'trainable': False
        })] * 2, mock_module_cls.call_args_list)

        # Final encodings will just be the count of the tokens in each
        # sentence, repeated 3 times.
        encodings = encoder.encode(["hello"])
        np.testing.assert_allclose([[3, 3, 3]], encodings)

        encodings = encoder.encode(["hello", "hello hi"])
        np.testing.assert_allclose([[3, 3, 3], [4, 4, 4]], encodings)