Ejemplo n.º 1
0
    def test_default_low_maxunicode(self):
        import sys
        import mock

        from google.cloud.language.document import Encoding

        with mock.patch.dict(sys.__dict__, maxunicode=65535):
            self.assertEqual(Encoding.get_default(), Encoding.UTF16)
        with mock.patch.dict(sys.__dict__, maxunicode=1114111):
            self.assertEqual(Encoding.get_default(), Encoding.UTF32)
Ejemplo n.º 2
0
    def _annotate_text_helper(self, include_sentiment, include_entities,
                              include_syntax):
        from google.cloud.language.document import Annotations
        from google.cloud.language.document import Encoding
        from google.cloud.language.entity import EntityType

        token_info, sentences = _get_token_and_sentences(include_syntax)
        entities = _get_entities(include_entities)
        tokens = [_make_token_json(*info) for info in token_info]
        response = {
            'sentences': sentences,
            'tokens': tokens,
            'entities': entities,
            'language': 'en-US',
        }
        if include_sentiment:
            response['documentSentiment'] = {
                'score': ANNOTATE_SCORE,
                'magnitude': ANNOTATE_MAGNITUDE,
            }

        client = make_mock_client(response)
        document = self._make_one(client, ANNOTATE_CONTENT)

        annotations = document.annotate_text(
            include_syntax=include_syntax,
            include_entities=include_entities,
            include_sentiment=include_sentiment)
        self.assertIsInstance(annotations, Annotations)
        # Sentences
        self._verify_sentences(include_syntax, annotations)
        # Token
        self._verify_tokens(annotations, token_info)
        # Sentiment
        if include_sentiment:
            self._verify_sentiment(annotations.sentiment, ANNOTATE_SCORE,
                                   ANNOTATE_MAGNITUDE)
        else:
            self.assertIsNone(annotations.sentiment)
        # Entity
        if include_entities:
            self.assertEqual(len(annotations.entities), 1)
            entity = annotations.entities[0]
            self._verify_entity(entity, ANNOTATE_NAME, EntityType.LOCATION,
                                ANNOTATE_WIKI_URL, ANNOTATE_SALIENCE)
        else:
            self.assertEqual(annotations.entities, [])

        # Verify the request.
        expected = self._expected_data(ANNOTATE_CONTENT,
                                       encoding_type=Encoding.get_default(),
                                       extract_sentiment=include_sentiment,
                                       extract_entities=include_entities,
                                       extract_syntax=include_syntax)
        client._connection.api_request.assert_called_once_with(
            path='annotateText', method='POST', data=expected)
Ejemplo n.º 3
0
    def test_analyze_syntax(self):
        from google.cloud.language.api_responses import SyntaxResponse
        from google.cloud.language.document import Encoding
        from google.cloud.language.syntax import PartOfSpeech

        name1 = 'R-O-C-K'
        name2 = 'USA'
        content = name1 + ' in the ' + name2
        response = {
            'sentences': [{
                'text': {
                    'content': 'R-O-C-K in the USA',
                    'beginOffset': -1,
                },
                'sentiment': None,
            }],
            'tokens': [
                {
                    'text': {
                        'content': 'R-O-C-K',
                        'beginOffset': -1,
                    },
                    'partOfSpeech': {
                        'tag': 'NOUN',
                    },
                    'dependencyEdge': {
                        'headTokenIndex': 0,
                        'label': 'ROOT',
                    },
                    'lemma': 'R-O-C-K',
                },
                {
                    'text': {
                        'content': 'in',
                        'beginOffset': -1,
                    },
                    'partOfSpeech': {
                        'tag': 'ADP',
                    },
                    'dependencyEdge': {
                        'headTokenIndex': 0,
                        'label': 'PREP',
                    },
                    'lemma': 'in',
                },
                {
                    'text': {
                        'content': 'the',
                        'beginOffset': -1,
                    },
                    'partOfSpeech': {
                        'tag': 'DET',
                    },
                    'dependencyEdge': {
                        'headTokenIndex': 3,
                        'label': 'DET',
                    },
                    'lemma': 'the',
                },
                {
                    'text': {
                        'content': 'USA',
                        'beginOffset': -1,
                    },
                    'partOfSpeech': {
                        'tag': 'NOUN',
                    },
                    'dependencyEdge': {
                        'headTokenIndex': 1,
                        'label': 'POBJ',
                    },
                    'lemma': 'USA',
                },
            ],
            'language':
            'en-US',
        }
        client = make_mock_client(response)
        document = self._make_one(client, content)

        syntax_response = document.analyze_syntax()
        self.assertIsInstance(syntax_response, SyntaxResponse)

        tokens = syntax_response.tokens
        self.assertEqual(len(tokens), 4)
        token1 = tokens[0]
        self._verify_token(token1, name1, PartOfSpeech.NOUN, name1)
        token2 = tokens[1]
        self._verify_token(token2, 'in', PartOfSpeech.ADPOSITION, 'in')
        token3 = tokens[2]
        self._verify_token(token3, 'the', PartOfSpeech.DETERMINER, 'the')
        token4 = tokens[3]
        self._verify_token(token4, name2, PartOfSpeech.NOUN, name2)

        # Verify the request.
        expected = self._expected_data(content,
                                       encoding_type=Encoding.get_default())
        client._connection.api_request.assert_called_once_with(
            path='analyzeSyntax', method='POST', data=expected)
Ejemplo n.º 4
0
    def test_analyze_entity_sentiment(self):
        from google.cloud.language.document import Encoding
        from google.cloud.language.entity import EntityType
        from google.cloud.language.sentiment import Sentiment

        name1 = 'R-O-C-K'
        name2 = 'USA'
        content = name1 + ' in the ' + name2
        wiki2 = 'http://en.wikipedia.org/wiki/United_States'
        salience1 = 0.91391456
        salience2 = 0.086085409
        sentiment = Sentiment(score=0.15, magnitude=42)
        response = {
            'entities': [
                {
                    'name':
                    name1,
                    'type':
                    EntityType.OTHER,
                    'metadata': {},
                    'salience':
                    salience1,
                    'mentions': [{
                        'text': {
                            'content': name1,
                            'beginOffset': -1
                        },
                        'type': 'TYPE_UNKNOWN',
                    }],
                    'sentiment': {
                        'score': 0.15,
                        'magnitude': 42,
                    }
                },
                {
                    'name':
                    name2,
                    'type':
                    EntityType.LOCATION,
                    'metadata': {
                        'wikipedia_url': wiki2
                    },
                    'salience':
                    salience2,
                    'mentions': [
                        {
                            'text': {
                                'content': name2,
                                'beginOffset': -1,
                            },
                            'type': 'PROPER',
                        },
                    ],
                    'sentiment': {
                        'score': 0.15,
                        'magnitude': 42,
                    }
                },
            ],
            'language':
            'en-US',
        }
        client = make_mock_client(response, api_version='v1beta2')
        document = self._make_one(client, content)

        entity_response = document.analyze_entity_sentiment()
        self.assertEqual(len(entity_response.entities), 2)
        entity1 = entity_response.entities[0]
        self._verify_entity(entity1, name1, EntityType.OTHER, None, salience1,
                            sentiment)
        entity2 = entity_response.entities[1]
        self._verify_entity(entity2, name2, EntityType.LOCATION, wiki2,
                            salience2, sentiment)

        # Verify the request.
        expected = self._expected_data(content,
                                       encoding_type=Encoding.get_default())
        client._connection.api_request.assert_called_once_with(
            path='analyzeEntitySentiment', method='POST', data=expected)