def query(request): '''Gets NLP API response''' if request.method == 'POST': try: data = CamelCaseJSONParser().parse(request) text = data['nlp_text'] except KeyError: return HttpResponseBadRequest() text = check_text(text) if text is None: return JsonResponse(RESPONSE_EMPTY, status=200) client = LanguageServiceClient() # pylint: disable=no-member document = types.Document( content=text, type=enums.Document.Type.PLAIN_TEXT) # pylint: enable=no-member try: google_category_response = client.classify_text(document=document) google_analysis_response = client.analyze_entities(document=document) response = MessageToDict(google_category_response) analysis_response = MessageToDict(google_analysis_response) except InvalidArgument: return JsonResponse(RESPONSE_EMPTY, status=200) response = make_response(response, analysis_response) return JsonResponse(response, status=200) return HttpResponseNotAllowed(['POST'])
def main() -> None: args = get_args() config = get_bunch_config_from_json(args.config) comet_experiment = Experiment( api_key=config.comet_api_key, project_name=config.comet_project_name, workspace=config.comet_workspace, disabled=not config.use_comet_experiments, ) comet_experiment.set_name(config.experiment_name) comet_experiment.log_parameters(config) test_tweets = load_test_tweets(config.test_data_path) client = LanguageServiceClient() result = [] predictions = np.zeros(len(test_tweets), dtype=np.int32) for i, tweet in enumerate(test_tweets): start_iter_timestamp = time.time() document = types.Document( type=enums.Document.Type.PLAIN_TEXT, content=tweet, language="en" ) response = client.analyze_sentiment(document=document) response_dict = MessageToDict(response) result.append(response_dict) prediction_present = bool(response_dict["documentSentiment"]) if prediction_present: # -1, 1 predictions predictions[i] = 2 * (response.document_sentiment.score > 0) - 1 print("iteration", i, "took:", time.time() - start_iter_timestamp, "seconds") comet_experiment.log_asset_data(result, name="google_nlp_api_response.json") ids = np.arange(1, len(test_tweets) + 1).astype(np.int32) predictions_table = np.column_stack((ids, predictions)) if comet_experiment.disabled: save_path = build_save_path(config) os.makedirs(save_path) formatted_predictions_table = pd.DataFrame( predictions_table, columns=["Id", "Prediction"], dtype=np.int32, ) formatted_predictions_table.to_csv( os.path.join(save_path, "google_nlp_api_predictions.csv"), index=False ) else: comet_experiment.log_table( filename="google_nlp_api_predictions.csv", tabular_data=predictions_table, headers=["Id", "Prediction"], ) percentage_predicted = np.sum(predictions != 0) / predictions.shape[0] comet_experiment.log_metric(name="percentage predicted", value=percentage_predicted)
def binary_sentiment(text, verbose=True): client = LanguageServiceClient() # The text to analyze document = types.Document(content=text, type=enums.Document.Type.PLAIN_TEXT, language=get_state().language) # Detects the sentiment of the text sentiment = client.analyze_sentiment(document=document).document_sentiment if verbose: # print('Text: {}'.format(text)) print('Sentiment: {}, {}'.format(sentiment.score, sentiment.magnitude)) return sentiment.score
def _sentiment_analysis(client: LanguageServiceClient, text: str) -> Tuple[str, str]: """detects sentiment in the text.""" length = text.count(" ") + 1 # instantiates a plain text document document = types.Document( content=text, type=enums.Document.Type.PLAIN_TEXT) # detects sentiment in the document sentiment = client.analyze_sentiment(document).document_sentiment if not sentiment: return "", "" else: score = sentiment.score magnitude = sentiment.magnitude saturation = magnitude / length > 0.1 if score <= SCORE_SCALE[0]: return "Audiences have apparently negative reviews", "😠" elif SCORE_SCALE[0] < score <= SCORE_SCALE[1]: return "The reviews are somewhat negative", "☹" elif SCORE_SCALE[1] < score < SCORE_SCALE[2]: return "The reviews are slightly negative", "🙁" elif SCORE_SCALE[2] <= score <= SCORE_SCALE[3]: if saturation: return "Audiences have mixed reviews", "🤨" else: return "Audiences are neutral", "😶" elif SCORE_SCALE[3] < score <= SCORE_SCALE[4]: return "Reviews are pretty positive~", "🙂" else: return "Reviews are complimenting!", "😄"
def analyze(content): translate_client = TranslateClient() language_client = LanguageServiceClient() if isinstance(content, six.binary_type): content = content.decode('utf-8') translation = translate_client.translate(content, target_language='en') document = { 'type': enums.Document.Type.PLAIN_TEXT, 'content': translation['translatedText'] } response = language_client.analyze_sentiment(document) sentiment = response.document_sentiment print('Translation: {}'.format(translation['translatedText'])) print('Score: {}'.format(sentiment.score)) print('Magnitude: {}'.format(sentiment.magnitude))
def classify(text, verbose=True): """Classify the input text into categories.""" text = _preprocess_text(text) if not text: return language_client = LanguageServiceClient() document = types.Document(content=text, type=enums.Document.Type.PLAIN_TEXT, language=get_state().language) try: response = language_client.classify_text(document) except Exception as e: print(e) return categories = response.categories result = {x.name: x.confidence for x in categories} print(result) return result
class GoogleNaturalLanguageNameParser(object): def __init__(self): if not settings.TEST: self.client = LanguageServiceClient() # pragma: no cover self.type = enums.Entity.Type.PERSON def parse(self, content): source, text = content document = types.Document(content=text, type=enums.Document.Type.PLAIN_TEXT) entities = self.client.analyze_entities(document=document).entities return [(source, e.name) for e in entities if e.type == self.type]
def analyze_entities(text): '''Returns a set of detected entities, and parameters associated with those entities, such as the entity's type, relevance of the entity to the overall text, and locations in the text that refer to the same entity. Entities are returned in the order (highest to lowest) of their salience scores, which reflect their relevance to the overall text. :param text: string :return: JSON ''' client = LanguageServiceClient() document = types.Document(content=text, type=enums.Document.Type.PLAIN_TEXT) encoding_type = enums.EncodingType.UTF8 entities = client.analyze_entities(document=document, encoding_type=encoding_type) return entities
def get_nlp_api_results( client: language.LanguageServiceClient, text_content: str) -> language.types.AnalyzeEntitySentimentResponse: """Retrieves sentiment/entity information per entity on the whole transcript. Args: client: google.cloud.language.LanguageServiceClient text_content: String containing text of transcribed audio file. Returns: google.cloud.language.types.AnalyzeEntitySentimentResponse. """ logging.info(f'Starting get_nlp_api_results with {client} and ' f'{text_content}') try: text = language.types.Document(content=text_content, type='PLAIN_TEXT') return client.analyze_entity_sentiment(document=text, encoding_type='UTF32') except Exception as e: logging.error('Retrieving response from NLP failed.') logging.error(e)
def _extract_adjective(client: LanguageServiceClient, comments: List[str]) -> str: """Function all to NLP to pull out all adjectives from the text. """ text = "".join(comments) # if isinstance(text, six.binary_type): # text = text.encode("utf-8") # instantiates a plain text document. document = types.Document( content=text.encode("utf-8"), type=enums.Document.Type.PLAIN_TEXT ) # decompose the text to tokens tokens = client.analyze_syntax(document).tokens # results are store as list of tokens adj_list = u"" for token in tokens: # append all adjectives to result part_of_speech_tag = enums.PartOfSpeech.Tag(token.part_of_speech.tag) if part_of_speech_tag.name == "ADJ": adj_list += f"{token.text.content} " return adj_list
def __init__(self): if not settings.TEST: self.client = LanguageServiceClient() # pragma: no cover self.type = enums.Entity.Type.PERSON
class CloudLanguage: def __init__(self, credentials: Optional[Union[str, Path]] = None) -> None: if credentials is None: self.client = LanguageServiceClient() else: self.client = LanguageServiceClient.from_service_account_file(filename=credentials) def annotate_text_from_string( self, content: str, encoding_type: str = enums.EncodingType.UTF32, retry: Optional[Retry] = None, timeout: Optional[float] = None, metadata: Optional[Sequence[Tuple[str, str]]] = None, language: str = "en", document_type: str = enums.Document.Type.PLAIN_TEXT, syntax: bool = True, entities: bool = True, document_sentiment: bool = True, entity_sentiment: bool = True, classify: bool = True) -> str: """ Args: content: encoding_type: retry: timeout: metadata: language: document_type: syntax: entities: document_sentiment: entity_sentiment: classify: Returns: """ features = {"extract_syntax": syntax, "extract_entities": entities, "extract_document_sentiment": document_sentiment, "extract_entity_sentiment": entity_sentiment, "classify_text": classify } document = types.Document(content=content, language=language, type=document_type) response = self.client.annotate_text( document=document, features=features, encoding_type=encoding_type, retry=retry, timeout=timeout, metadata=metadata) return json.loads(MessageToJson(response)) @staticmethod def parse(response: Dict) -> Dict: results = CloudLanguage.parse_sentences(response) results.update(CloudLanguage.parse_tokens(response)) results.update(CloudLanguage.parse_document_sentiment(response)) results.update(CloudLanguage.parse_entity(response)) results.update(CloudLanguage.parse_categories(response)) results.update({'language': response['language']}) return results @staticmethod def parse_sentences(response: Dict) -> Dict[str, List[Union[str, float]]]: results = { 'sentence_content': [], 'sentence_begin_offset': [], 'sentence_sentiment_magnitude': [], 'sentence_sentiment_score': [] } for sentence in response['sentences']: try: results['sentence_begin_offset'].append(sentence['text']['beginOffset']) except KeyError: results['sentence_begin_offset'].append(None) results['sentence_content'].append(sentence['text']['content']) results['sentence_sentiment_magnitude'].append(sentence['sentiment']['magnitude']) results['sentence_sentiment_score'].append(sentence['sentiment']['score']) return results @staticmethod def parse_tokens(response: Dict) -> Mapping[str, List[Union[str, float]]]: results = { 'token_content': [], 'token_begin_offset': [], 'token_pos_tag': [], 'token_pos_number': [], 'token_dependency_edge_head_token_index': [], 'token_dependency_edge_label': [], 'token_lemma': [] } for token in response['tokens']: try: results['token_begin_offset'].append(token['text']['beginOffset']) except KeyError: results['token_begin_offset'].append(None) try: results['token_pos_number'].append(token['partOfSpeech']['number']) except KeyError: results['token_pos_number'].append(None) results['token_content'].append(token['text']['content']) results['token_pos_tag'].append(token['partOfSpeech']['tag']) results['token_dependency_edge_head_token_index'].append(token['dependencyEdge']['headTokenIndex']) results['token_dependency_edge_label'].append(token['dependencyEdge']['label']) results['token_lemma'].append(token['lemma']) return results @staticmethod def parse_entity(response: Dict) -> Mapping[str, List[Union[str, float]]]: results = { 'entity_name': [], 'entity_type': [], 'entity_salience': [], 'entity_mention_content': [], 'entity_mention_begin_offset': [], 'entity_mention_type': [], 'entity_sentiment_magnitude': [], 'entity_sentiment_score': [] } for entity in response['entities']: results['entity_name'].append(entity['name']) results['entity_type'].append(entity['type']) results['entity_salience'].append(entity['salience']) for mention in entity['mentions']: try: results['entity_mention_begin_offset'].append(mention['text']['beginOffset']) except KeyError: results['entity_mention_begin_offset'].append(None) try: results['entity_sentiment_magnitude'].append(mention['sentiment']['magnitude']) except KeyError: results['entity_sentiment_magnitude'].append(None) try: results['entity_sentiment_score'].append(mention['sentiment']['score']) except KeyError: results['entity_sentiment_score'].append(None) results['entity_mention_content'].append(mention['text']['content']) results['entity_mention_type'].append(mention['type']) return results @staticmethod def parse_document_sentiment(response: Dict) -> Dict[str, float]: results = { 'document_sentiment_magnitude': response['documentSentiment']['magnitude'], 'document_sentiment_score': response['documentSentiment']['score'] } return results @staticmethod def parse_categories(response: Dict) -> Dict: results = { 'category_name': [], 'category_confidence': [] } for category in response['categories']: results['category_name'].append(category['name']) results['category_confidence'].append(category['confidence']) return results
def __init__(self, credentials: Optional[Union[str, Path]] = None) -> None: if credentials is None: self.client = LanguageServiceClient() else: self.client = LanguageServiceClient.from_service_account_file(filename=credentials)