예제 #1
0
def query(request):
    '''Gets NLP API response'''
    if request.method == 'POST':
        try:
            data = CamelCaseJSONParser().parse(request)
            text = data['nlp_text']
        except KeyError:
            return HttpResponseBadRequest()

        text = check_text(text)

        if text is None:
            return JsonResponse(RESPONSE_EMPTY, status=200)

        client = LanguageServiceClient()

        # pylint: disable=no-member
        document = types.Document(
            content=text,
            type=enums.Document.Type.PLAIN_TEXT)
        # pylint: enable=no-member

        try:
            google_category_response = client.classify_text(document=document)
            google_analysis_response = client.analyze_entities(document=document)
            response = MessageToDict(google_category_response)
            analysis_response = MessageToDict(google_analysis_response)
        except InvalidArgument:
            return JsonResponse(RESPONSE_EMPTY, status=200)

        response = make_response(response, analysis_response)

        return JsonResponse(response, status=200)
    return HttpResponseNotAllowed(['POST'])
예제 #2
0
def main() -> None:
    args = get_args()
    config = get_bunch_config_from_json(args.config)

    comet_experiment = Experiment(
        api_key=config.comet_api_key,
        project_name=config.comet_project_name,
        workspace=config.comet_workspace,
        disabled=not config.use_comet_experiments,
    )
    comet_experiment.set_name(config.experiment_name)
    comet_experiment.log_parameters(config)

    test_tweets = load_test_tweets(config.test_data_path)

    client = LanguageServiceClient()
    result = []
    predictions = np.zeros(len(test_tweets), dtype=np.int32)

    for i, tweet in enumerate(test_tweets):
        start_iter_timestamp = time.time()
        document = types.Document(
            type=enums.Document.Type.PLAIN_TEXT, content=tweet, language="en"
        )

        response = client.analyze_sentiment(document=document)
        response_dict = MessageToDict(response)
        result.append(response_dict)

        prediction_present = bool(response_dict["documentSentiment"])
        if prediction_present:
            # -1, 1 predictions
            predictions[i] = 2 * (response.document_sentiment.score > 0) - 1

        print("iteration", i, "took:", time.time() - start_iter_timestamp, "seconds")

    comet_experiment.log_asset_data(result, name="google_nlp_api_response.json")

    ids = np.arange(1, len(test_tweets) + 1).astype(np.int32)
    predictions_table = np.column_stack((ids, predictions))

    if comet_experiment.disabled:
        save_path = build_save_path(config)
        os.makedirs(save_path)

        formatted_predictions_table = pd.DataFrame(
            predictions_table, columns=["Id", "Prediction"], dtype=np.int32,
        )
        formatted_predictions_table.to_csv(
            os.path.join(save_path, "google_nlp_api_predictions.csv"), index=False
        )
    else:
        comet_experiment.log_table(
            filename="google_nlp_api_predictions.csv",
            tabular_data=predictions_table,
            headers=["Id", "Prediction"],
        )

    percentage_predicted = np.sum(predictions != 0) / predictions.shape[0]
    comet_experiment.log_metric(name="percentage predicted", value=percentage_predicted)
def binary_sentiment(text, verbose=True):
    client = LanguageServiceClient()
    # The text to analyze
    document = types.Document(content=text,
                              type=enums.Document.Type.PLAIN_TEXT,
                              language=get_state().language)

    # Detects the sentiment of the text
    sentiment = client.analyze_sentiment(document=document).document_sentiment
    if verbose:
        # print('Text: {}'.format(text))
        print('Sentiment: {}, {}'.format(sentiment.score, sentiment.magnitude))

    return sentiment.score
예제 #4
0
def _sentiment_analysis(client: LanguageServiceClient, text: str) -> Tuple[str, str]:
    """detects sentiment in the text."""
    length = text.count(" ") + 1

    # instantiates a plain text document
    document = types.Document(
        content=text,
        type=enums.Document.Type.PLAIN_TEXT)

    # detects sentiment in the document
    sentiment = client.analyze_sentiment(document).document_sentiment
    if not sentiment:
        return "", ""
    else:
        score = sentiment.score
        magnitude = sentiment.magnitude

        saturation = magnitude / length > 0.1

        if score <= SCORE_SCALE[0]:
            return "Audiences have apparently negative reviews", "&#x1f620"
        elif SCORE_SCALE[0] < score <= SCORE_SCALE[1]:
            return "The reviews are somewhat negative", "&#x2639"
        elif SCORE_SCALE[1] < score < SCORE_SCALE[2]:
            return "The reviews are slightly negative", "&#x1f641"
        elif SCORE_SCALE[2] <= score <= SCORE_SCALE[3]:
            if saturation:
                return "Audiences have mixed reviews", "&#x1f928"
            else:
                return "Audiences are neutral", "&#x1f636"
        elif SCORE_SCALE[3] < score <= SCORE_SCALE[4]:
            return "Reviews are pretty positive~", "&#128578"
        else:
            return "Reviews are complimenting!", "&#x1f604"
예제 #5
0
def analyze(content):
    translate_client = TranslateClient()
    language_client = LanguageServiceClient()

    if isinstance(content, six.binary_type):
        content = content.decode('utf-8')

    translation = translate_client.translate(content, target_language='en')

    document = {
        'type': enums.Document.Type.PLAIN_TEXT,
        'content': translation['translatedText']
    }
    response = language_client.analyze_sentiment(document)
    sentiment = response.document_sentiment

    print('Translation: {}'.format(translation['translatedText']))
    print('Score: {}'.format(sentiment.score))
    print('Magnitude: {}'.format(sentiment.magnitude))
def classify(text, verbose=True):
    """Classify the input text into categories."""
    text = _preprocess_text(text)
    if not text:
        return
    language_client = LanguageServiceClient()

    document = types.Document(content=text,
                              type=enums.Document.Type.PLAIN_TEXT,
                              language=get_state().language)
    try:
        response = language_client.classify_text(document)
    except Exception as e:
        print(e)
        return
    categories = response.categories

    result = {x.name: x.confidence for x in categories}
    print(result)
    return result
예제 #7
0
class GoogleNaturalLanguageNameParser(object):
    def __init__(self):
        if not settings.TEST:
            self.client = LanguageServiceClient()  # pragma: no cover
        self.type = enums.Entity.Type.PERSON

    def parse(self, content):
        source, text = content
        document = types.Document(content=text, type=enums.Document.Type.PLAIN_TEXT)
        entities = self.client.analyze_entities(document=document).entities
        return [(source, e.name) for e in entities if e.type == self.type]
예제 #8
0
def analyze_entities(text):
    '''Returns a set of detected entities, and parameters associated with those entities, such as the
    entity's type, relevance of the entity to the overall text, and locations in the text that refer to the same entity.
    Entities are returned in the order (highest to lowest) of their salience scores, which reflect their relevance to
    the overall text.

    :param text: string
    :return: JSON
    '''

    client = LanguageServiceClient()

    document = types.Document(content=text,
                              type=enums.Document.Type.PLAIN_TEXT)
    encoding_type = enums.EncodingType.UTF8

    entities = client.analyze_entities(document=document,
                                       encoding_type=encoding_type)

    return entities
예제 #9
0
def get_nlp_api_results(
        client: language.LanguageServiceClient,
        text_content: str) -> language.types.AnalyzeEntitySentimentResponse:
    """Retrieves sentiment/entity information per entity on the whole transcript.

    Args:
        client: google.cloud.language.LanguageServiceClient
        text_content: String containing text of transcribed audio file.

    Returns:
        google.cloud.language.types.AnalyzeEntitySentimentResponse.
    """
    logging.info(f'Starting get_nlp_api_results with {client} and '
                 f'{text_content}')
    try:
        text = language.types.Document(content=text_content, type='PLAIN_TEXT')
        return client.analyze_entity_sentiment(document=text,
                                               encoding_type='UTF32')

    except Exception as e:
        logging.error('Retrieving response from NLP failed.')
        logging.error(e)
예제 #10
0
def _extract_adjective(client: LanguageServiceClient, comments: List[str]) -> str:
    """Function all to NLP to pull out all adjectives from the text.
    """
    text = "".join(comments)
    # if isinstance(text, six.binary_type):
    #     text = text.encode("utf-8")

    # instantiates a plain text document.
    document = types.Document(
        content=text.encode("utf-8"),
        type=enums.Document.Type.PLAIN_TEXT
    )

    # decompose the text to tokens
    tokens = client.analyze_syntax(document).tokens

    # results are store as list of tokens
    adj_list = u""
    for token in tokens:
        # append all adjectives to result
        part_of_speech_tag = enums.PartOfSpeech.Tag(token.part_of_speech.tag)
        if part_of_speech_tag.name == "ADJ":
            adj_list += f"{token.text.content} "
    return adj_list
예제 #11
0
 def __init__(self):
     if not settings.TEST:
         self.client = LanguageServiceClient()  # pragma: no cover
     self.type = enums.Entity.Type.PERSON
예제 #12
0
class CloudLanguage:
    def __init__(self, credentials: Optional[Union[str, Path]] = None) -> None:
        if credentials is None:
            self.client = LanguageServiceClient()
        else:
            self.client = LanguageServiceClient.from_service_account_file(filename=credentials)

    def annotate_text_from_string(
            self,
            content: str,
            encoding_type: str = enums.EncodingType.UTF32,
            retry: Optional[Retry] = None,
            timeout: Optional[float] = None,
            metadata: Optional[Sequence[Tuple[str, str]]] = None,
            language: str = "en",
            document_type: str = enums.Document.Type.PLAIN_TEXT,
            syntax: bool = True,
            entities: bool = True,
            document_sentiment: bool = True,
            entity_sentiment: bool = True,
            classify: bool = True) -> str:
        """
        Args:
            content:
            encoding_type:
            retry:
            timeout:
            metadata:
            language:
            document_type:
            syntax:
            entities:
            document_sentiment:
            entity_sentiment:
            classify:
        Returns:
        """

        features = {"extract_syntax": syntax,
                    "extract_entities": entities,
                    "extract_document_sentiment": document_sentiment,
                    "extract_entity_sentiment": entity_sentiment,
                    "classify_text": classify
                    }

        document = types.Document(content=content, language=language, type=document_type)
        response = self.client.annotate_text(
            document=document,
            features=features,
            encoding_type=encoding_type,
            retry=retry,
            timeout=timeout,
            metadata=metadata)
        return json.loads(MessageToJson(response))

    @staticmethod
    def parse(response: Dict) -> Dict:
        results = CloudLanguage.parse_sentences(response)
        results.update(CloudLanguage.parse_tokens(response))
        results.update(CloudLanguage.parse_document_sentiment(response))
        results.update(CloudLanguage.parse_entity(response))
        results.update(CloudLanguage.parse_categories(response))
        results.update({'language': response['language']})
        return results

    @staticmethod
    def parse_sentences(response: Dict) -> Dict[str, List[Union[str, float]]]:

        results = {
            'sentence_content': [],
            'sentence_begin_offset': [],
            'sentence_sentiment_magnitude': [],
            'sentence_sentiment_score': []
        }
        for sentence in response['sentences']:
            try:
                results['sentence_begin_offset'].append(sentence['text']['beginOffset'])
            except KeyError:
                results['sentence_begin_offset'].append(None)

            results['sentence_content'].append(sentence['text']['content'])
            results['sentence_sentiment_magnitude'].append(sentence['sentiment']['magnitude'])
            results['sentence_sentiment_score'].append(sentence['sentiment']['score'])

        return results

    @staticmethod
    def parse_tokens(response: Dict) -> Mapping[str, List[Union[str, float]]]:
        results = {
            'token_content': [],
            'token_begin_offset': [],
            'token_pos_tag': [],
            'token_pos_number': [],
            'token_dependency_edge_head_token_index': [],
            'token_dependency_edge_label': [],
            'token_lemma': []
        }

        for token in response['tokens']:
            try:
                results['token_begin_offset'].append(token['text']['beginOffset'])
            except KeyError:
                results['token_begin_offset'].append(None)

            try:
                results['token_pos_number'].append(token['partOfSpeech']['number'])
            except KeyError:
                results['token_pos_number'].append(None)

            results['token_content'].append(token['text']['content'])
            results['token_pos_tag'].append(token['partOfSpeech']['tag'])
            results['token_dependency_edge_head_token_index'].append(token['dependencyEdge']['headTokenIndex'])
            results['token_dependency_edge_label'].append(token['dependencyEdge']['label'])
            results['token_lemma'].append(token['lemma'])

        return results

    @staticmethod
    def parse_entity(response: Dict) -> Mapping[str, List[Union[str, float]]]:

        results = {
            'entity_name': [],
            'entity_type': [],
            'entity_salience': [],
            'entity_mention_content': [],
            'entity_mention_begin_offset': [],
            'entity_mention_type': [],
            'entity_sentiment_magnitude': [],
            'entity_sentiment_score': []
        }

        for entity in response['entities']:
            results['entity_name'].append(entity['name'])
            results['entity_type'].append(entity['type'])
            results['entity_salience'].append(entity['salience'])
            for mention in entity['mentions']:
                try:
                    results['entity_mention_begin_offset'].append(mention['text']['beginOffset'])
                except KeyError:
                    results['entity_mention_begin_offset'].append(None)

                try:
                    results['entity_sentiment_magnitude'].append(mention['sentiment']['magnitude'])
                except KeyError:
                    results['entity_sentiment_magnitude'].append(None)

                try:
                    results['entity_sentiment_score'].append(mention['sentiment']['score'])
                except KeyError:
                    results['entity_sentiment_score'].append(None)

                results['entity_mention_content'].append(mention['text']['content'])
                results['entity_mention_type'].append(mention['type'])

        return results

    @staticmethod
    def parse_document_sentiment(response: Dict) -> Dict[str, float]:

        results = {
            'document_sentiment_magnitude': response['documentSentiment']['magnitude'],
            'document_sentiment_score': response['documentSentiment']['score']
        }
        return results

    @staticmethod
    def parse_categories(response: Dict) -> Dict:
        results = {
            'category_name': [],
            'category_confidence': []
        }
        for category in response['categories']:
            results['category_name'].append(category['name'])
            results['category_confidence'].append(category['confidence'])

        return results
예제 #13
0
 def __init__(self, credentials: Optional[Union[str, Path]] = None) -> None:
     if credentials is None:
         self.client = LanguageServiceClient()
     else:
         self.client = LanguageServiceClient.from_service_account_file(filename=credentials)