def analyze_sentiment(text): """ Analyze sentiment in a text Args: text -> The content to be analyzed """ try: client = language_v1.LanguageServiceClient().from_service_account_json( "./service_account_key.json") except FileNotFoundError: credentials = compute_engine.Credentials() client = language_v1.LanguageServiceClient(credentials=credentials) type_ = enums.Document.Type.PLAIN_TEXT language = "en" encoding_type = enums.EncodingType.UTF8 document = {"content": text, "type": type_, "language": language} response = client.analyze_sentiment(document, encoding_type=encoding_type) # Get overall sentiment document_sentiment = response.document_sentiment score = document_sentiment.score sentiment = assign_sentiment(score) return sentiment
def extract_entities(self, text): client = language_v1.LanguageServiceClient() type_ = enums.Document.Type.PLAIN_TEXT language = "en" document = { "content": text, "type": type_, "language": language } # Available values: NONE, UTF8, UTF16, UTF32 encoding_type = enums.EncodingType.UTF8 google_response = client.analyze_entities(document, encoding_type) # Google extraction for entity in google_response.entities: if enums.Entity.Type(entity.type).name in self.CODEC: self.entities.append({ 'text': entity.name, 'type': self.CODEC[enums.Entity.Type(entity.type).name], }) # self.text = self.text.replace(entity.name, ' ', 1) return self.entities
def analyze_text_sentiment(text): client = language.LanguageServiceClient() document = language.Document(content=text, type_=language.Document.Type.PLAIN_TEXT) response = client.analyze_sentiment(document=document) sentiment = response.document_sentiment results = dict( text=text, score=f"{sentiment.score:.1%}", magnitude=f"{sentiment.magnitude:.1%}", ) for k, v in results.items(): print(f"{k:10}: {v}") # Get sentiment for all sentences in the document sentence_sentiment = [] for sentence in response.sentences: item = {} item["text"] = sentence.text.content item["sentiment score"] = sentence.sentiment.score item["sentiment magnitude"] = sentence.sentiment.magnitude sentence_sentiment.append(item) return sentence_sentiment
def __init__(self, language = 'en'): path = "nlp/google.json" os.environ["GOOGLE_APPLICATION_CREDENTIALS"]=path self.client = language_v1.LanguageServiceClient() self.type_ = enums.Document.Type.PLAIN_TEXT self.language = language self.encoding_type = enums.EncodingType.UTF8
def sample_analyze_sentiment(content): client = language_v1.LanguageServiceClient() if isinstance(content, six.binary_type): content = content.decode('utf-8') type_ = enums.Document.Type.PLAIN_TEXT document = {'type': type_, 'content': content} response = client.analyze_sentiment(document) sentiment = response.document_sentiment print("analyze by the whole text") print( "Score: '+' means positive comments, Score: '-'' means negative comments " ) print("the higher of Magnitude, the stronger feeling or expression ") print( '-------------------------------------------------------------------------' ) print('Score: {}'.format(sentiment.score)) print('Magnitude: {}'.format(sentiment.magnitude) + '\n') return 0
def create_text(): data = request.get_json() or {} text = MoodText() text.from_dict(data) credentials_raw = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS') service_account_info = json.loads(credentials_raw) credentials = service_account.Credentials.from_service_account_info(service_account_info) client = language_v1.LanguageServiceClient(credentials=credentials) if isinstance(text.content, six.binary_type): text.content = text.content.decode('utf-8') type_ = enums.Document.Type.PLAIN_TEXT document = {'type': type_, 'content': text.content} response = client.analyze_sentiment(document) sentiment = response.document_sentiment text.sentiment = round(sentiment.score*255) text.date = datetime.now() db.session.add(text) db.session.commit() response = jsonify(text.to_dict()) response.status_code = 201 return response
def analyze_entity_sentiment(text_content): """ Analyzing Entity Sentiment in a String Args: text_content The text content to analyze """ client = language_v1.LanguageServiceClient() type_ = enums.Document.Type.PLAIN_TEXT language = "en" document = {"content": text_content, "type": type_, "language": language} encoding_type = enums.EncodingType.UTF8 response = client.analyze_entity_sentiment(document, encoding_type=encoding_type) all_entities = [] for entity in response.entities: sentiment = entity.sentiment results = [('name', entity.name), ('type', enums.Entity.Type(entity.type).name), ('salience', entity.salience), ('wikipedia_url', entity.metadata.get('wikipedia_url', '-')), ('mid', entity.metadata.get('mid', '-')), ('sentiment', sentiment.score), ('magnitude', sentiment.magnitude)] all_entities.append(results) return all_entities
def analyze__comment_nlp(self, comment: Comment) -> float: client = language_v1.LanguageServiceClient() # Get comment content text_content = comment.content type_ = language_v1.Document.Type.PLAIN_TEXT # If not specifed, client will autodetect # Assume to be English for RPI Reddit comments for now langauge = "en" document = { "content": text_content, "type_": type_, "language": language } # Set encoding type encodying_type = language_v1.EncodyingType.UTF8 # Get the sentiment of the content response = client.analyze_sentiment(request={ 'document': document, 'encoding_type': encoding_type }) return response.document_sentiment.score
def analyze_text(text): client = language_v1.LanguageServiceClient() document = language_v1.Document(content=text, type_=language_v1.Document.Type.PLAIN_TEXT) return client.analyze_sentiment(request={ 'document': document }).document_sentiment
def determineCategory(text_content): from google.cloud import language_v1 client = language_v1.LanguageServiceClient() # Available types: PLAIN_TEXT, HTML type_ = language_v1.Document.Type.PLAIN_TEXT language = "en" document = {"content": text_content, "type_": type_, "language": language} response = client.classify_text(request={'document': document}) categories = [] # Loop through classified categories returned from the API for category in response.categories: # Get the name of the category representing the document. # See the predefined taxonomy of categories: # https://cloud.google.com/natural-language/docs/categories print(u"Category name: {}".format(category.name)) # Get the confidence. Number representing how certain the classifier # is that this category represents the provided text. print(u"Confidence: {}".format(category.confidence)) categories.append(category.name) return categories[0]
def sample_analyze_sentiment(text_content): """ This function Analyzes Sentiment in a String :param text_content: The string text content to analyze :return: sentiment gathered from the string passed in """ client = language_v1.LanguageServiceClient() # text_content = 'I am so happy and joyful.' # Available types: PLAIN_TEXT, HTML type_ = language_v1.Document.Type.PLAIN_TEXT # Optional. If not specified, the language is automatically detected. # For list of supported languages: # https://cloud.google.com/natural-language/docs/languages language = "en" document = {"content": text_content, "type_": type_, "language": language} # Available values: NONE, UTF8, UTF16, UTF32 encoding_type = language_v1.EncodingType.UTF8 response = client.analyze_sentiment(request={ 'document': document, 'encoding_type': encoding_type }) # Get overall sentiment of the input document print(u"Document sentiment score: {}".format( response.document_sentiment.score)) print(u"Document sentiment magnitude: {}".format( response.document_sentiment.magnitude)) return response.document_sentiment.score
def store_sentiment_of_article(formatted_text, filename): # remove html tags text = remove_html_tags(formatted_text) if filename.exists(): logger.info(f'File {filename} already exists') else: try: # set environment variable for authentication os.environ[ "GOOGLE_APPLICATION_CREDENTIALS"] = r"C:\Users\di872\Desktop\renewrs_recsys\renewrs-e50f271b94e7.json" # run sentiment analysis request on text client = language_v1.LanguageServiceClient() document = language_v1.Document( content=text, type_=language_v1.Document.Type.PLAIN_TEXT) annotations = client.analyze_sentiment( request={'document': document}) json_data = json.loads(annotations.__class__.to_json(annotations)) # create folder to save sentiment in filename.parent.mkdir(parents=True, exist_ok=True) # save sentiment with open(str(filename), "w") as file: json.dump(json_data, file, indent=4) except Exception as e: logger.error(f'Failure for {filename}')
def get_sentiment_score_using_google(text_list): client = language_v1.LanguageServiceClient() texts = [] text_sentiment = [] text_score = [] for text in tqdm(text_list): try: document = language_v1.Document( content=text, type_=language_v1.Document.Type.PLAIN_TEXT, language='en') sentiment = client.analyze_sentiment(request={ 'document': document }).document_sentiment texts.append(text) text_score.append(sentiment.score) sentiment_ = 'positive' if sentiment.score > 0 else 'negative' # This is just me text_sentiment.append(sentiment_) except: pass return texts, text_sentiment, text_score
def test_annotate_text(self): # Setup Expected Response language = 'language-1613589672' expected_response = {'language': language} expected_response = language_service_pb2.AnnotateTextResponse( **expected_response) # Mock the API response channel = ChannelStub(responses=[expected_response]) patch = mock.patch('google.api_core.grpc_helpers.create_channel') with patch as create_channel: create_channel.return_value = channel client = language_v1.LanguageServiceClient() # Setup Request document = {} features = {} response = client.annotate_text(document, features) assert expected_response == response assert len(channel.requests) == 1 expected_request = language_service_pb2.AnnotateTextRequest( document=document, features=features) actual_request = channel.requests[0][1] assert expected_request == actual_request
def analyze_entities(text_content): """ Analyzing Entities from a String Args: text_content The text content to analyze """ # Set connection to client as variable credentials = service_account.Credentials.from_service_account_file( r"C:\Users\timfl\Documents\GoogleCloudKeys\MyFirstProject-e85779938beb.json" ) client = language_v1.LanguageServiceClient(credentials=credentials) # Set type_ to read PLAIN_TEXT type_ = language_v1.Document.Type.PLAIN_TEXT # specify language & set document variable ''' https://cloud.google.com/natural-language/docs/languages if language is not set, it will be detected. ''' lang = "en" document = {"content": text_content, "type_": type_, "language": lang} # Set Encoding type to UTF8 encoding_type = language_v1.EncodingType.UTF8 # Pass in client request with defined specifications response = client.analyze_entities(request={ 'document': document, 'encoding_type': encoding_type }) # Loop through entitites returned from the API for entity in response.entities: ''' Get entity name print(entity.name) Get entity type (PERSON, LOCATION, ADDRESS, NUMBER, etc) print(language_v1.Entity.Type(entity.type_).name) ''' print(entity.name) # Get salience score in [0, 1.0] range print(u"Salience score: {}".format(entity.salience)) # Loop over the mentions of entity from input document. for mention in entity.mentions: tempArray = [] # append name of entity to tempArray tempArray.append(u"{}".format(mention.text.content)) # Get mention type, e.g. PROPER for proper noun tempArray.append(u"{}".format( language_v1.EntityMention.Type(mention.type_).name)) strBase.append(tempArray) # print(tempArray) print(strBase)
def sentiment_analysis(): client, config, ret = language_v1.LanguageServiceClient(), get_config(), [] # get all keywords keywords = [] for k, v in config["currencies"].items(): keywords += v["keywords"] # analyze tweets d = fetch_tweets(config["twitter"]["accounts"], keywords) for user_id, tweets in d.items(): for tweet in tweets: tweet_id = tweet["id"] text = u"{}".format(tweet["text"]) doc = language_v1.Document( content=text, type_=language_v1.Document.Type.PLAIN_TEXT ) sent = client.analyze_sentiment( request={"document": doc} ).document_sentiment """ print("Text: {}".format(text)) print("Sentiment: {:.2%}, {:.2%}".format( sent.score, sent.magnitude)) """ ret.append((tweet_id, text, sent.score, sent.magnitude)) return ret
def analyze_text_sentiment_rest(text): client = language.LanguageServiceClient() document = language.Document(content=text, type_=language.Document.Type.PLAIN_TEXT) response = client.analyze_sentiment(document=document) sentiment = response.document_sentiment results = dict( text=text, score=f"{sentiment.score:.1%}", magnitude=f"{sentiment.magnitude:.1%}", ) for k, v in results.items(): print(f"{k:10}: {v}") sentence_sentiment = [] for index, sentence in enumerate(response.sentences, start=1): item = {} item["text"] = sentence.text.content item["score"] = sentence.sentiment.score item["magnitude"] = sentence.sentiment.magnitude item["index"] = index sentence_sentiment.append(item) return sentence_sentiment
def sample_analyze_sentiment(text_content, array_name): """ Analyzing Sentiment in a String Args: text_content The text content to analyze """ l_client = language_v1.LanguageServiceClient() type_ = enums.Document.Type.PLAIN_TEXT language = "en" document = {"content": text_content, "type": type_, "language": language} # Available values: NONE, UTF8, UTF16, UTF32 encoding_type = enums.EncodingType.UTF8 response = l_client.analyze_sentiment(document, encoding_type=encoding_type) # Get overall sentiment of the input document print(u"Document sentiment score: {}".format( response.document_sentiment.score)) array_name["sentiments"] = format(response.document_sentiment.score) print(u"Document sentiment magnitude: {}".format( response.document_sentiment.magnitude)) # Get sentiment for all sentences in the document for sentence in response.sentences: print(u"Sentence text: {}".format(sentence.text.content)) print(u"Sentence sentiment score: {}".format(sentence.sentiment.score)) print(u"Sentence sentiment magnitude: {}".format( sentence.sentiment.magnitude)) print(u"Language of the text: {}".format(response.language))
def test_analyze_sentiment(self): # Setup Expected Response language = "language-1613589672" expected_response = {"language": language} expected_response = language_service_pb2.AnalyzeSentimentResponse( **expected_response) # Mock the API response channel = ChannelStub(responses=[expected_response]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = language_v1.LanguageServiceClient() # Setup Request document = {} response = client.analyze_sentiment(document) assert expected_response == response assert len(channel.requests) == 1 expected_request = language_service_pb2.AnalyzeSentimentRequest( document=document) actual_request = channel.requests[0][1] assert expected_request == actual_request
def sentiment_analysis(sentiment_text): from google.cloud import language_v1 from google.cloud.language_v1 import enums #Module Setup client = language_v1.LanguageServiceClient() type_ = enums.Document.Type.PLAIN_TEXT language = "en" document = {"content": sentiment_text, "type": type_, "language": language} encoding_type = enums.EncodingType.UTF8 #Get overall document sentiment response = client.analyze_sentiment(document, encoding_type=encoding_type) # Get overall sentiment of the input document document_sentiment_score = response.document_sentiment.score print(f"Document sentiment score: {document_sentiment_score}") document_sentiment_magnitude = response.document_sentiment.magnitude print(f"Document sentiment score: {document_sentiment_magnitude}") #Interpret the results if document_sentiment_score >= 0.2: print("Sentiment returned is: Positive") return "Positive" elif document_sentiment_score == 0.1: print("Sentiment returned is: Neutral") return "Neutral" elif document_sentiment_score == 0.0: print("Sentiment returned is: Mixed") return "Mixed" elif document_sentiment_score < 0.0: print("Sentiment returned is: Negative") return "Negative"
def __init__(self): """ create an instance of the class """ # Instantiates a client self._client = language_v1.LanguageServiceClient()
def sample_analyze_sentiment(text_content): """ Analyzing Sentiment in a String Args: text_content The text content to analyze """ # installtion # - pip3 install google-cloud-language # - Enable Google Cloud Natural Language API # credential_path = "/home/nroshania/Personal-Coding-Projects/COVID19/private/covid19-2020-0fb8513fcbd8.json" # credential_path = "C:\\Users\\nrosh\\Desktop\\Personal Coding Projects\\COVID19\\private\\covid19-2020-0fb8513fcbd8.json" # os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credential_path client = language_v1.LanguageServiceClient() # text_content = 'I am so happy and joyful.' # Available types: PLAIN_TEXT, HTML type_ = enums.Document.Type.PLAIN_TEXT language = "en" # Optional. If not specified, the language is automatically detected. # For list of supported languages: # https://cloud.google.com/natural-language/docs/languages document = {"content": text_content, "type": type_, "language": language} # Available values: NONE, UTF8, UTF16, UTF32 encoding_type = enums.EncodingType.UTF8 return client.analyze_sentiment(document, encoding_type=encoding_type)
def get_sentiment(instances_content): """Analyzing Sentiment in a String Args: text_content The text content to analyze """ scores = [] client = language_v1.LanguageServiceClient() encoding_type = enums.EncodingType.UTF8 language = 'en' type_ = enums.Document.Type.PLAIN_TEXT for content in instances_content: content = content.encode('utf-8') if isinstance( content, unicode) else str(content) document = {'content': content, 'type': type_, 'language': language} try: response = client.analyze_sentiment(document, encoding_type=encoding_type, timeout=30, retry=retry.Retry(deadline=60)) # Get overall sentiment of the input document if response.document_sentiment.score: scores.append(response.document_sentiment.score) else: scores.append(-1) logging.error( 'Document sentiment score not found for {}'.format( content)) except exceptions.GoogleAPICallError as e: logging.exception(e) except exceptions.RetryError as e: logging.exception(e) except ValueError as e: logging.exception(e) return scores
def tweet_sentiment(name): client = language_v1.LanguageServiceClient() tweet_list, df = twitter_user(name) score_list = ['\0' for i in range(len(tweet_list))] for i in range(len(tweet_list)): sentence_str = str(tweet_list[i]) document = language_v1.Document( content=sentence_str, type_=language_v1.Document.Type.PLAIN_TEXT) sentiment = client.analyze_sentiment(request={ 'document': document }).document_sentiment score_list[i] = format( (sentiment.score + 1) * 25 + sentiment_predict(sentence_str) * 50, ".1f") # print("Text: {}".format(sentence_str.strip("['']"))) # print("Sentiment: (score) {} (magnitude) {}".format(sentiment.score, sentiment.magnitude)) # return sentiment.score, sentence_str.strip("['']") # return score_list, sentiment.magnitude return score_list
def sample_analyze_entities(text_content): """ Analyzing Entities in a String Args: text_content The text content to analyze """ client = language_v1.LanguageServiceClient() #text_content = 'California is a state.' # Available types: PLAIN_TEXT, HTML type_ = language_v1.Document.Type.PLAIN_TEXT # Optional. If not specified, the language is automatically detected. # For list of supported languages: # https://cloud.google.com/natural-language/docs/languages language = "ko" document = {"content": text_content, "type_": type_, "language": language} # Available values: NONE, UTF8, UTF16, UTF32 encoding_type = language_v1.EncodingType.UTF8 response = client.analyze_entities(request={ 'document': document, 'encoding_type': encoding_type }) # Loop through entitites returned from the API for entity in response.entities: print(u"Representative name for the entity: {}".format(entity.name)) # Get entity type, e.g. PERSON, LOCATION, ADDRESS, NUMBER, et al print(u"Entity type: {}".format( language_v1.Entity.Type(entity.type_).name)) # Get the salience score associated with the entity in the [0, 1.0] range print(u"Salience score: {}".format(entity.salience)) # Loop over the metadata associated with entity. For many known entities, # the metadata is a Wikipedia URL (wikipedia_url) and Knowledge Graph MID (mid). # Some entity types may have additional metadata, e.g. ADDRESS entities # may have metadata for the address street_name, postal_code, et al. for metadata_name, metadata_value in entity.metadata.items(): print(u"{}: {}".format(metadata_name, metadata_value)) # Loop over the mentions of this entity in the input document. # The API currently supports proper noun mentions. for mention in entity.mentions: print(u"Mention text: {}".format(mention.text.content)) # Get the mention type, e.g. PROPER for proper noun print(u"Mention type: {}".format( language_v1.EntityMention.Type(mention.type_).name)) # Get the language of the text, which will be the same as # the language specified in the request or, if not specified, # the automatically-detected language. print(u"Language of the text: {}".format(response.language))
def categories(text_content): """ Classifying Content in a String Args: text_content The text content to analyze. Must include at least 20 words. """ client = language_v1.LanguageServiceClient() # text_content = 'That actor on TV makes movies in Hollywood and also stars in a variety of popular new TV shows.' # Available types: PLAIN_TEXT, HTML type_ = enums.Document.Type.PLAIN_TEXT # Optional. If not specified, the language is automatically detected. # For list of supported languages: # https://cloud.google.com/natural-language/docs/languages language = "en" document = {"content": text_content, "type": type_, "language": language} response = client.classify_text(document) # Loop through classified categories returned from the API cat = '' for category in response.categories: # Get the name of the category representing the document. # See the predefined taxonomy of categories: # https://cloud.google.com/natural-language/docs/categories flag = 0 if flag < 1: cat = cat + category.name flag = 1 else: cat = cat + " & " + category.name return cat
def sample_classify_text(text_content): """ Classifying Content in a String Args: text_content The text content to analyze. Must include at least 20 words. """ client = language_v1.LanguageServiceClient() # text_content = 'That actor on TV makes movies in Hollywood and also stars in a variety of popular new TV shows.' # Available types: PLAIN_TEXT, HTML type_ = language_v1.Document.Type.PLAIN_TEXT # Optional. If not specified, the language is automatically detected. # For list of supported languages: # https://cloud.google.com/natural-language/docs/languages language = "yue-Hant-HK" document = {"content": text_content, "type_": type_, "language": language} response = client.classify_text(request={'document': document}) # Loop through classified categories returned from the API for category in response.categories: # Get the name of the category representing the document. # See the predefined taxonomy of categories: # https://cloud.google.com/natural-language/docs/categories print(u"Category name: {}".format(category.name)) # Get the confidence. Number representing how certain the classifier # is that this category represents the provided text. print(u"Confidence: {}".format(category.confidence))
def sample_analyze_sentiment(text_content, scoreDataframe): """ Analyzing Sentiment in a String Args: text_content The text content to analyze """ #scoreDataframe = pd.DataFrame(columns = ['Sentence','Score']) client = language_v1.LanguageServiceClient() type_ = enums.Document.Type.PLAIN_TEXT language = "en" document = { "content": text_content, "type": type_, "language": language } encoding_type = enums.EncodingType.UTF8 response = client.analyze_sentiment(document, encoding_type=encoding_type) for sentence in response.sentences: sent = sentence.text.content senti = sentence.sentiment.score scoreDataframe = scoreDataframe.append( { 'Sentence': sent, 'Score': senti }, ignore_index=True) return scoreDataframe
def detect_entities(goods_declaration): from google.cloud import language_v1 from google.cloud.language_v1 import types from google.cloud.language_v1 import enums import six content = goods_declaration client = language_v1.LanguageServiceClient() if isinstance(content, six.binary_type): content = content.decode('utf-8') document = {'type': enums.Document.Type.PLAIN_TEXT, 'content': content} entities = client.analyze_entities(document).entities # entity_type = ('UNKNOWN', 'PERSON', 'LOCATION', 'ORGANIZATION', # 'EVENT', 'WORK_OF_ART', 'CONSUMER_GOOD', 'OTHER') print('\n') print('=' * 20) print("Entities") print('=' * 20) result = [] for entity in entities: result.append(entity.name) print(u'{:<16}: {}'.format('name', entity.name)) print(u'{:<16}: {}'.format('salience', entity.salience)) return result
def sample_analyze_sentiment(username, content): client = language_v1.LanguageServiceClient() #content = 'Your text to analyze, e.g. Hello, world!' if isinstance(content, six.binary_type): content = content.decode('utf-8') type_ = enums.Document.Type.PLAIN_TEXT document = {'type': type_, 'content': content} response = client.analyze_sentiment(document) sentiment = response.document_sentiment print('User Name: {}'.format(username)) print('Content: {}'.format(content)) print('Score: {}'.format(sentiment.score)) print('Magnitude: {}'.format(sentiment.magnitude) + '\n') dict_ = { 'User Name': username, 'Content': content, 'Score': sentiment.score, 'Magnitude': sentiment.magnitude } return dict_