def gcp_analyze_entities(text, debug=0): """ Analyzing Entities in a String Args: text_content The text content to analyze """ if request.method == "POST": text = str(request.form) client = language.LanguageServiceClient() document = language.Document(content=text, type_=language.Document.Type.PLAIN_TEXT) response = client.analyze_entities(document=document) output = [] # Loop through entitites returned from the API for entity in response.entities: item = {} item["name"] = entity.name item["type"] = language.Entity.Type(entity.type_).name item["Salience"] = entity.salience if debug: print(u"Representative name for the entity: {}".format( entity.name)) # Get entity type, e.g. PERSON, LOCATION, ADDRESS, NUMBER, et al print(u"Entity type: {}".format( language.Entity.Type(entity.type_).name)) # Get the salience score associated with the entity in the [0, 1.0] range print(u"Salience score: {}".format(entity.salience)) # Loop over the metadata associated with entity. For many known entities, # the metadata is a Wikipedia URL (wikipedia_url) and Knowledge Graph MID (mid). # Some entity types may have additional metadata, e.g. ADDRESS entities # may have metadata for the address street_name, postal_code, et al. for metadata_name, metadata_value in entity.metadata.items(): item[metadata_name] = metadata_value if debug: print(u"{}: {}".format(metadata_name, metadata_value)) # Loop over the mentions of this entity in the input document. # The API currently supports proper noun mentions. if debug: for mention in entity.mentions: print(u"Mention text: {}".format(mention.text.content)) # Get the mention type, e.g. PROPER for proper noun print(u"Mention type: {}".format( language.EntityMention.Type(mention.type_).name)) output.append(item) # Get the language of the text, which will be the same as # the language specified in the request or, if not specified, # the automatically-detected language. if debug: print(u"Language of the text: {}".format(response.language)) return str(output)
def _analyze_handler(self, text, text_file, file_type, json_file, lang, analyze_method): file_type = to_texttype(file_type) parameters = {"type_": file_type} if text: parameters["content"] = text elif text_file: with open(text_file, "r") as f: # pylint: disable=unspecified-encoding parameters["content"] = f.read() else: raise AttributeError("Either 'text' or 'text_file' must be given") if lang is not None: parameters["language"] = lang document = language_v1.Document(**parameters) if analyze_method == "classify": response = self.service.classify_text(document=document) elif analyze_method == "sentiment": # Available values: NONE, UTF8, UTF16, UTF32 # encoding_type = enums.EncodingType.UTF8 response = self.service.analyze_sentiment(document=document, encoding_type="UTF8") self.write_json(json_file, response) return response
def classify(text, verbose=True): """Classify the input text into categories. """ language_client = language_v1.LanguageServiceClient() document = language_v1.Document(content=text, type_=language_v1.Document.Type.PLAIN_TEXT) response = language_client.classify_text(request={'document': document}) categories = response.categories result = {} for category in categories: # Turn the categories into a dictionary of the form: # {category.name: category.confidence}, so that they can # be treated as a sparse vector. result[category.name] = category.confidence if verbose: print(text) for category in categories: print(u"=" * 20) print(u"{:<16}: {}".format("category", category.name)) print(u"{:<16}: {}".format("confidence", category.confidence)) return result
def classify_text(text): client = language.LanguageServiceClient() document = language.Document(content=text, type_=language.Document.Type.PLAIN_TEXT) response = client.classify_text(document=document) return response
def analyze_text_sentiment(text): client = language.LanguageServiceClient() document = language.Document(content=text, type_=language.Document.Type.PLAIN_TEXT) response = client.analyze_sentiment(document=document) sentiment = response.document_sentiment results = dict( text=text, score=f"{sentiment.score:.1%}", magnitude=f"{sentiment.magnitude:.1%}", ) for k, v in results.items(): print(f"{k:10}: {v}") # Get sentiment for all sentences in the document sentence_sentiment = [] for sentence in response.sentences: item = {} item["text"] = sentence.text.content item["sentiment score"] = sentence.sentiment.score item["sentiment magnitude"] = sentence.sentiment.magnitude sentence_sentiment.append(item) return sentence_sentiment
def get_sentiment_predictions(texts): predictions = [] for text in texts: #text = preprocess_text(text) document = language_v1.Document(content=text, type_=language_v1.Document.Type.PLAIN_TEXT) sentiments = client.analyze_sentiment(request={'document': document}) score = 0 for sentence in sentiments.sentences: for keyword in keywords: if keyword['phrase'].replace(' ', '') in sentence.text.content.lower().replace(' ', ''): if sentence.sentiment.score <= -0.4: if keyword['negative'] == 'left': score -= keyword['priority'] else: score += keyword['priority'] elif sentence.sentiment.score >= 0.4: if keyword['positive'] == 'left': score -= keyword['priority'] else: score += keyword['priority'] break if score < 0: predictions.append('left') elif score > 0: predictions.append('right') else: predictions.append('non-political') return predictions
def process_ideas_gcp(self, session_id): '''Method to pass idea to GCP''' log.debug('process idea GCP: starting') messages = self.db.find(coll='raw_messages', filtro={"session_id": session_id}) for message in messages: document = language_v1.Document( content=message["content"], type_=language_v1.Document.Type.PLAIN_TEXT) try: entities = json.loads( proto.Message.to_json( self.gcp.analyze_entities( request={'document': document}))) except Exception as e: log.error(e) syntax = json.loads( proto.Message.to_json( self.gcp.analyze_syntax(request={'document': document}))) log.debug(f'response {type(entities)}') self.db.insert_db(coll='gcp_response', doc={ "session_id": session_id, "message": message["content"], "entities": entities["entities"], "language_text": entities["language"], "syntax": syntax })
def analyze_text_sentiment(text): """ This is modified from the Google NLP API documentation found here: https://cloud.google.com/natural-language/docs/analyzing-sentiment It makes a call to the Google NLP API to retrieve sentiment analysis. """ client = language.LanguageServiceClient() document = language.Document(content=text, type_=language.Document.Type.PLAIN_TEXT) response = client.analyze_sentiment(document=document) # Format the results as a dictionary sentiment = response.document_sentiment results = dict( text=text, score=f"{sentiment.score:.1%}", magnitude=f"{sentiment.magnitude:.1%}", ) # Print the results for observation for k, v in results.items(): print(f"{k:10}: {v}") # Get sentiment for all sentences in the document sentence_sentiment = [] for sentence in response.sentences: item = {} item["text"] = sentence.text.content item["sentiment score"] = sentence.sentiment.score item["sentiment magnitude"] = sentence.sentiment.magnitude sentence_sentiment.append(item) return sentence_sentiment
def run_quickstart(): # [START language_quickstart] # Imports the Google Cloud client library # [START language_python_migration_imports] from google.cloud import language_v1 # [END language_python_migration_imports] # Instantiates a client # [START language_python_migration_client] client = language_v1.LanguageServiceClient() # [END language_python_migration_client] # The text to analyze text = "Hello, world!" document = language_v1.Document( content=text, type_=language_v1.Document.Type.PLAIN_TEXT ) # Detects the sentiment of the text sentiment = client.analyze_sentiment( request={"document": document} ).document_sentiment print("Text: {}".format(text)) print("Sentiment: {}, {}".format(sentiment.score, sentiment.magnitude))
def analyze_text(text): # os.environ["GOOGLE_APPLICATION_CREDENTIALS"]= 'sentiment_analysis.json' # Instantiates a client client = language_v1.LanguageServiceClient() document = language_v1.Document(content=text, type_=language_v1.Document.Type.HTML) # Detects the sentiment of the text sentiment = client.analyze_sentiment(request={ 'document': document }).document_sentiment score = round(sentiment.score, 4) magnitude = round(sentiment.magnitude, 4) if score >= 0.6 and magnitude >= 0.8: label = 'Positive' elif score <= -0.6 and magnitude >= 0.8: label = 'Negative' elif magnitude < 0.8: label = 'Neutral' else: label = 'Mixed' short_text = text if len(text) < 500 else text[:500] + '...' return (label, score, magnitude, short_text)
def sentiment_analysis(): client, config, ret = language_v1.LanguageServiceClient(), get_config(), [] # get all keywords keywords = [] for k, v in config["currencies"].items(): keywords += v["keywords"] # analyze tweets d = fetch_tweets(config["twitter"]["accounts"], keywords) for user_id, tweets in d.items(): for tweet in tweets: tweet_id = tweet["id"] text = u"{}".format(tweet["text"]) doc = language_v1.Document( content=text, type_=language_v1.Document.Type.PLAIN_TEXT ) sent = client.analyze_sentiment( request={"document": doc} ).document_sentiment """ print("Text: {}".format(text)) print("Sentiment: {:.2%}, {:.2%}".format( sent.score, sent.magnitude)) """ ret.append((tweet_id, text, sent.score, sent.magnitude)) return ret
def __get_sentiment(self, client, significant_data, today, results, containers): # This is helper function called by the get_news function. # significat_data is the string of data passed in whose sentiment score # we are interested in. Today is a boolean value True if the publish date of # the article is today, else False. results is the dictionary of values # that will be returned by get_news. Containers is the dictionary of data # structures used to store intermediate calculations. try: document = language_v1.Document( content=significant_data, type_=language_v1.Document.Type.PLAIN_TEXT) sentiment = client.analyze_sentiment(request={ 'document': document }).document_sentiment if today == True: results['articles_today'] += 1 containers['today_total_magnitude'] += sentiment.magnitude containers['today_magnitude_scores'].append( sentiment.magnitude) containers['today_sentiment_scores'].append(sentiment.score) elif today == False: containers['total_magnitude'] += sentiment.magnitude containers['magnitude_scores'].append(sentiment.magnitude) containers['sentiment_scores'].append(sentiment.score) except Exception as e: print(e) return results, containers
def analyze_text(text): client = language_v1.LanguageServiceClient() document = language_v1.Document(content=text, type_=language_v1.Document.Type.PLAIN_TEXT) return client.analyze_sentiment(request={ 'document': document }).document_sentiment
def store_sentiment_of_article(formatted_text, filename): # remove html tags text = remove_html_tags(formatted_text) if filename.exists(): logger.info(f'File {filename} already exists') else: try: # set environment variable for authentication os.environ[ "GOOGLE_APPLICATION_CREDENTIALS"] = r"C:\Users\di872\Desktop\renewrs_recsys\renewrs-e50f271b94e7.json" # run sentiment analysis request on text client = language_v1.LanguageServiceClient() document = language_v1.Document( content=text, type_=language_v1.Document.Type.PLAIN_TEXT) annotations = client.analyze_sentiment( request={'document': document}) json_data = json.loads(annotations.__class__.to_json(annotations)) # create folder to save sentiment in filename.parent.mkdir(parents=True, exist_ok=True) # save sentiment with open(str(filename), "w") as file: json.dump(json_data, file, indent=4) except Exception as e: logger.error(f'Failure for {filename}')
def get_sentiment(self, text): document = language_v1.Document( content=text, type_=language_v1.Document.Type.PLAIN_TEXT) sentiment = self.client.analyze_sentiment(request={ 'document': document }).document_sentiment return sentiment
def get_twitter_text(keyword): global avg for tweet in tweepy.Cursor(api.search, q=keyword, lang='en', include_entities=False, tweet_mode='extended').items(100): if "RT" not in tweet.full_text: document = language_v1.Document( content=cleaning_text(tweet.full_text), type_=language_v1.Document.Type.PLAIN_TEXT) sentiment = client.analyze_sentiment(request={ 'document': document }).document_sentiment # result.append([cleaning_text(tweet.full_text), sentiment.score, sentiment.magnitude]) result = { 'text': cleaning_text(tweet.full_text), 'sentimentscore': sentiment.score } if sentiment.score != 0: get_avg_sentimentscore(sentiment.score) results.append(result) avg = allscore / count return results
def defineScore(text): # Instantiates a client client = language_v1.LanguageServiceClient() document = language_v1.Document(content=text, type_=language_v1.Document.Type.PLAIN_TEXT) # Detects the sentiment of the text sentiment = client.analyze_sentiment(request={'document': document}).document_sentiment return sentiment.score
def analyze_text_sentiment_rest(text): client = language.LanguageServiceClient() document = language.Document(content=text, type_=language.Document.Type.PLAIN_TEXT) response = client.analyze_sentiment(document=document) sentiment = response.document_sentiment results = dict( text=text, score=f"{sentiment.score:.1%}", magnitude=f"{sentiment.magnitude:.1%}", ) for k, v in results.items(): print(f"{k:10}: {v}") sentence_sentiment = [] for index, sentence in enumerate(response.sentences, start=1): item = {} item["text"] = sentence.text.content item["score"] = sentence.sentiment.score item["magnitude"] = sentence.sentiment.magnitude item["index"] = index sentence_sentiment.append(item) return sentence_sentiment
def getSentimentFromWeb(description): # Instantiates a client # data = request.get_json() # headers = {'User-Agent': 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3'} # # URL = request.get_json()['url'] # URL = url # page = requests.get(URL, headers=headers) # soup1 = soup(page.content, 'html.parser') client = language_v1.LanguageServiceClient(credentials=creds2) # data = soup1.find('body').text # wordlen = len(data) # half = int(wordlen / 2) # if half > 1000: # half = wordlen - 100 # data = data[half:] # print("Text: "+(data["data"])) # The text to analyze text = u"".join(description) document = language_v1.Document(content=text, type_=language_v1.Document.Type.PLAIN_TEXT) # # Detects the sentiment of the text sentiment = client.analyze_sentiment(request={'document': document}).document_sentiment # print("Text: {}".format(text)) # print("Sentiment: {}, {}".format(sentiment.score, sentiment.magnitude)) # return "Sentiment: {}, {}".format(sentiment.score, sentiment.magnitude) return sentiment.score
def classify(text, verbose=True): """Classify the input text into categories. """ language_client = language_v1.LanguageServiceClient() document = language_v1.Document( content=text, type_=language_v1.Document.Type.PLAIN_TEXT ) response = language_client.classify_text(request={'document': document}) categories = response.categories categories_list = [] for category in categories: # Turn the categories into a dictionary of the form: # {category.name: category.confidence}, so that they can # be treated as a sparse vector. # result[category.name] = category.confidence categories_list.append(category.name) # Available values: NONE, UTF8, UTF16, UTF32 encoding_type = language_v1.EncodingType.UTF8 response1 = language_client.analyze_entities(request={'document': document, 'encoding_type': encoding_type}) # Loop through entities returned from the API entities_list = [] for entity in response1.entities: if entity.salience > 0.05: entities_list.append(entity.name) else: break return categories_list, entities_list
def analyzeScore(text=None): document = language_v1.Document(content=text, type_=language_v1.Document.Type.PLAIN_TEXT) response = client.analyze_sentiment(request={ 'document': document }).document_sentiment return response.score
def analyze(filename): """Run a sentiment analysis request on text within a passed filename.""" client = language_v1.LanguageServiceClient() with open(filename, "r") as review_file: # Instantiates a plain text document. content = review_file.read() document = language_v1.Document(content=content, type_=language_v1.Document.Type.PLAIN_TEXT) annotations = client.analyze_sentiment(request={'document': document}) # Print the results # print_result(annotations) # Write sentence scores to csv csv_filename = write_result_to_csv(annotations, filename) # text = document.content # read csv data into dataframe sentence_scores = pd.read_csv(csv_filename) print(sentence_scores) # describe the sentence sentiments sentence_sentiments = sentence_scores.SentenceSentiment.describe() print(sentence_sentiments) # plot max temp. vs month sentence_scores.plot(kind='line', y='SentenceSentiment', x='SentenceIndex') plt.show()
def tweet_sentiment(name): client = language_v1.LanguageServiceClient() tweet_list, df = twitter_user(name) score_list = ['\0' for i in range(len(tweet_list))] for i in range(len(tweet_list)): sentence_str = str(tweet_list[i]) document = language_v1.Document( content=sentence_str, type_=language_v1.Document.Type.PLAIN_TEXT) sentiment = client.analyze_sentiment(request={ 'document': document }).document_sentiment score_list[i] = format( (sentiment.score + 1) * 25 + sentiment_predict(sentence_str) * 50, ".1f") # print("Text: {}".format(sentence_str.strip("['']"))) # print("Sentiment: (score) {} (magnitude) {}".format(sentiment.score, sentiment.magnitude)) # return sentiment.score, sentence_str.strip("['']") # return score_list, sentiment.magnitude return score_list
def get_sentiment_score_using_google(text_list): client = language_v1.LanguageServiceClient() texts = [] text_sentiment = [] text_score = [] for text in tqdm(text_list): try: document = language_v1.Document( content=text, type_=language_v1.Document.Type.PLAIN_TEXT, language='en') sentiment = client.analyze_sentiment(request={ 'document': document }).document_sentiment texts.append(text) text_score.append(sentiment.score) sentiment_ = 'positive' if sentiment.score > 0 else 'negative' # This is just me text_sentiment.append(sentiment_) except: pass return texts, text_sentiment, text_score
def get_sentiment_score(tweet): client = language_v1.LanguageServiceClient() document = language_v1.Document(content=tweet, type_=language_v1.Document.Type.PLAIN_TEXT) sentiment = client.analyze_sentiment(request={'document': document}).document_sentiment sentiment_score = sentiment.score sentiment_magnitude = sentiment.magnitude print(sentiment_score, sentiment_magnitude)
def getSentiment(self, text): doc = language.Document(content=text, language='en', type_=language.Document.Type.PLAIN_TEXT) textSentiment = self.client.analyze_sentiment(document=doc, encoding_type='UTF32') SentimentScore = textSentiment.document_sentiment.score return SentimentScore
def get_sentiment(tweet): client = language_v1.LanguageServiceClient() document = language_v1.Document(content=tweet, type_=language_v1.Document.Type.PLAIN_TEXT) result = client.analyze_sentiment(request={ 'document': document }).document_sentiment.score return (result)
def gcp_classify_text(text): client = language.LanguageServiceClient() document = language.Document(content=text, type_=language.Document.Type.PLAIN_TEXT) response = client.classify_text(document=document) #response = client.classify(document=document) for category in response.categories: return category.name
def analyze(content, client): """Run a sentiment analysis request on text within a passed filename.""" document = language_v1.Document(content=content, language='zh', type_=language_v1.Document.Type.PLAIN_TEXT) annotations = client.analyze_sentiment(request={'document': document}) # Print the results return annotations
def getSentiment(text): document = language_v1.Document( content=text, type_=language_v1.Document.Type.PLAIN_TEXT) # Detects the sentiment of the text sentiment = client.analyze_sentiment(request={ 'document': document }).document_sentiment return sentiment.score