def test_default_low_maxunicode(self): import sys import mock from google.cloud.language.document import Encoding with mock.patch.dict(sys.__dict__, maxunicode=65535): self.assertEqual(Encoding.get_default(), Encoding.UTF16) with mock.patch.dict(sys.__dict__, maxunicode=1114111): self.assertEqual(Encoding.get_default(), Encoding.UTF32)
def _annotate_text_helper(self, include_sentiment, include_entities, include_syntax): from google.cloud.language.document import Annotations from google.cloud.language.document import Encoding from google.cloud.language.entity import EntityType token_info, sentences = _get_token_and_sentences(include_syntax) entities = _get_entities(include_entities) tokens = [_make_token_json(*info) for info in token_info] response = { 'sentences': sentences, 'tokens': tokens, 'entities': entities, 'language': 'en-US', } if include_sentiment: response['documentSentiment'] = { 'score': ANNOTATE_SCORE, 'magnitude': ANNOTATE_MAGNITUDE, } client = make_mock_client(response) document = self._make_one(client, ANNOTATE_CONTENT) annotations = document.annotate_text( include_syntax=include_syntax, include_entities=include_entities, include_sentiment=include_sentiment) self.assertIsInstance(annotations, Annotations) # Sentences self._verify_sentences(include_syntax, annotations) # Token self._verify_tokens(annotations, token_info) # Sentiment if include_sentiment: self._verify_sentiment(annotations.sentiment, ANNOTATE_SCORE, ANNOTATE_MAGNITUDE) else: self.assertIsNone(annotations.sentiment) # Entity if include_entities: self.assertEqual(len(annotations.entities), 1) entity = annotations.entities[0] self._verify_entity(entity, ANNOTATE_NAME, EntityType.LOCATION, ANNOTATE_WIKI_URL, ANNOTATE_SALIENCE) else: self.assertEqual(annotations.entities, []) # Verify the request. expected = self._expected_data(ANNOTATE_CONTENT, encoding_type=Encoding.get_default(), extract_sentiment=include_sentiment, extract_entities=include_entities, extract_syntax=include_syntax) client._connection.api_request.assert_called_once_with( path='annotateText', method='POST', data=expected)
def test_analyze_syntax(self): from google.cloud.language.api_responses import SyntaxResponse from google.cloud.language.document import Encoding from google.cloud.language.syntax import PartOfSpeech name1 = 'R-O-C-K' name2 = 'USA' content = name1 + ' in the ' + name2 response = { 'sentences': [{ 'text': { 'content': 'R-O-C-K in the USA', 'beginOffset': -1, }, 'sentiment': None, }], 'tokens': [ { 'text': { 'content': 'R-O-C-K', 'beginOffset': -1, }, 'partOfSpeech': { 'tag': 'NOUN', }, 'dependencyEdge': { 'headTokenIndex': 0, 'label': 'ROOT', }, 'lemma': 'R-O-C-K', }, { 'text': { 'content': 'in', 'beginOffset': -1, }, 'partOfSpeech': { 'tag': 'ADP', }, 'dependencyEdge': { 'headTokenIndex': 0, 'label': 'PREP', }, 'lemma': 'in', }, { 'text': { 'content': 'the', 'beginOffset': -1, }, 'partOfSpeech': { 'tag': 'DET', }, 'dependencyEdge': { 'headTokenIndex': 3, 'label': 'DET', }, 'lemma': 'the', }, { 'text': { 'content': 'USA', 'beginOffset': -1, }, 'partOfSpeech': { 'tag': 'NOUN', }, 'dependencyEdge': { 'headTokenIndex': 1, 'label': 'POBJ', }, 'lemma': 'USA', }, ], 'language': 'en-US', } client = make_mock_client(response) document = self._make_one(client, content) syntax_response = document.analyze_syntax() self.assertIsInstance(syntax_response, SyntaxResponse) tokens = syntax_response.tokens self.assertEqual(len(tokens), 4) token1 = tokens[0] self._verify_token(token1, name1, PartOfSpeech.NOUN, name1) token2 = tokens[1] self._verify_token(token2, 'in', PartOfSpeech.ADPOSITION, 'in') token3 = tokens[2] self._verify_token(token3, 'the', PartOfSpeech.DETERMINER, 'the') token4 = tokens[3] self._verify_token(token4, name2, PartOfSpeech.NOUN, name2) # Verify the request. expected = self._expected_data(content, encoding_type=Encoding.get_default()) client._connection.api_request.assert_called_once_with( path='analyzeSyntax', method='POST', data=expected)
def test_analyze_entity_sentiment(self): from google.cloud.language.document import Encoding from google.cloud.language.entity import EntityType from google.cloud.language.sentiment import Sentiment name1 = 'R-O-C-K' name2 = 'USA' content = name1 + ' in the ' + name2 wiki2 = 'http://en.wikipedia.org/wiki/United_States' salience1 = 0.91391456 salience2 = 0.086085409 sentiment = Sentiment(score=0.15, magnitude=42) response = { 'entities': [ { 'name': name1, 'type': EntityType.OTHER, 'metadata': {}, 'salience': salience1, 'mentions': [{ 'text': { 'content': name1, 'beginOffset': -1 }, 'type': 'TYPE_UNKNOWN', }], 'sentiment': { 'score': 0.15, 'magnitude': 42, } }, { 'name': name2, 'type': EntityType.LOCATION, 'metadata': { 'wikipedia_url': wiki2 }, 'salience': salience2, 'mentions': [ { 'text': { 'content': name2, 'beginOffset': -1, }, 'type': 'PROPER', }, ], 'sentiment': { 'score': 0.15, 'magnitude': 42, } }, ], 'language': 'en-US', } client = make_mock_client(response, api_version='v1beta2') document = self._make_one(client, content) entity_response = document.analyze_entity_sentiment() self.assertEqual(len(entity_response.entities), 2) entity1 = entity_response.entities[0] self._verify_entity(entity1, name1, EntityType.OTHER, None, salience1, sentiment) entity2 = entity_response.entities[1] self._verify_entity(entity2, name2, EntityType.LOCATION, wiki2, salience2, sentiment) # Verify the request. expected = self._expected_data(content, encoding_type=Encoding.get_default()) client._connection.api_request.assert_called_once_with( path='analyzeEntitySentiment', method='POST', data=expected)