예제 #1
0
 def getLanguage(self):
     if self.clients['language'] is None:
         from google.cloud import language
         self.clients['language'] = language.Client(
             self.get_google_cloud_credentials('google-ml-apis'))
     # print 'projectId: %s' % self.projectId
     return self.clients['language']
def analyze(reviews):
    """
	param:reviews : list of reviews for a particular faculty
	"""
    annotations_list = []
    language_client = language.Client()

    for review in reviews:
        document = language_client.document_from_html(review.content)
        annotations = document.annotate_text(include_sentiment=True,
                                             include_syntax=False,
                                             include_entities=False)
        annotations_list.append(annotations)

    positive, negative, neutral = 0, 0, 0
    total = len(annotations_list)

    for annotation in annotations_list:
        score = annotation.sentiment.score
        magnitude = annotation.sentiment.magnitude

        if score >= 0.2:
            positive += 1
        elif score <= -0.2:
            negative += 1
        else:
            neutral += 1

    return (total, positive, negative, neutral)
예제 #3
0
def language_analysis():
    """
    reads the .txt file from the provided path in command line
    uses google language api to extract and print  "person", "organization" and "location"  name  entity from provided text file
    """
    # must  have language api activated for this to work
    from google.cloud import language  # importing google cloud natural language api
    try:
        file_path = sys.argv[1]
        text_file = open(file_path)
        text_content = text_file.read()
        language_client = language.Client(
        )  # calling client to make language api call
        client_text_content = language_client.document_from_text(text_content)

        recognized_entities = client_text_content.analyze_entities()

        # recognized_entities = processed_text.entities()
        extracted_entities = recognized_entities.entities
        for entity in extracted_entities:
            if entity.entity_type in ["PERSON", "ORGANIZATION", "LOCATION"]:
                print(entity.name)
    except KeyboardInterrupt:
        print("You cancelled the operation; Bbye.")
    except ImportError:
        print("No module found, make sure you have required modules installed")
    except IOError:
        print("An error orrured trying to read the file")
    except:
        print("something seems Strange; there is some error")
예제 #4
0
def sentiment_analysis():
    parser = reqparse.RequestParser()
    parser.add_argument("query")

    args = parser.parse_args()
    text = args.get("query")

    client = language.Client()
    try:
        document = client.document_from_text(text)
        sent_analysis = document.analyze_sentiment()
        dir(sent_analysis)
        sentiment = sent_analysis.sentiment
    except:
        print("Exception in sentiment_analysis")
        text = " "
        document = client.document_from_text(text)
        sent_analysis = document.analyze_sentiment()
        dir(sent_analysis)
        sentiment = sent_analysis.sentiment
    else:
        print("Non-exception error in sentiment_analysis")
        sentiment = ""
    result = {"score": sentiment.score, "magnitude": sentiment.magnitude}
    result = json.dumps(result)
    return result
def sentiment_text(result, text):
    """
    Detects sentiment in the text.
    Modified code at https://cloud.google.com/natural-language/docs/reference/libraries
    need to install 'Google Cloud SDK' and authenticate by typing the following command:
    gcloud auth application-default login
    """
    language_client = language.Client()

    f = open(text, 'r')
    analizee = f.read()
    f.close()
    # result.write(analizee)
    result.write(text + '\n')
    # Instantiates a plain text document.
    document = language_client.document_from_text(analizee)

    # Detects sentiment in the document. You can also analyze HTML with:
    #   document.doc_type == language.Document.HTML
    sentiment = document.analyze_sentiment().sentiment

    # if score is 0.0, try to disambiguate by extracting last 5 sentences
    if sentiment.score == 0.0:
        analizee = analizee.split('.')
        analizee = analizee[-5:]
        analizee = '.'.join(analizee)
    
    document = language_client.document_from_text(analizee)
    sentiment = document.analyze_sentiment().sentiment

    result.write('Score: {}\n'.format(sentiment.score))
    result.write('Magnitude: {}\n'.format(sentiment.magnitude))
예제 #6
0
    def create(cls, review, category):

        language_client = language.Client()
        document = language_client.document_from_text(review)
        sentiment = document.analyze_sentiment().sentiment

        if (sentiment.score >= -1) and (sentiment.score <= -0.26):
            polarity = "Negative"
            score = float(sentiment.score)
            magnitude = float(sentiment.magnitude)
        elif (sentiment.score >= -0.25) and (sentiment.score <= 0.25):
            polarity = "Neutral"
            score = float(sentiment.score)
            magnitude = float(sentiment.magnitude)
        elif (sentiment.score >= 0.26) and (sentiment.score <= 1):
            polarity = "Positive"
            score = float(sentiment.score)
            magnitude = float(sentiment.magnitude)

        reviews = AnalyzeSentiments()
        reviews.review = review
        reviews.category = category
        reviews.senti_polarity = polarity
        reviews.senti_score = score
        reviews.senti_magnitude = magnitude
        reviews.put()
        print('Category: {}'.format(category))
        print('Text: {}'.format(review))
        print('Polarity: {}'.format(polarity))
        print('Sentiment: {}, {}'.format(sentiment.score, sentiment.magnitude))

        return reviews
def googleAPI(df):
    text_content = df['article']
    client = language.Client()
    document = client.document_from_text(text_content)
    annotations = document.annotate_text(include_sentiment=True, include_syntax=True,
                                         include_entities=True)

    sentenceList = []
    for sentence in annotations.sentences:
        sentenceList.append(sentence.content)
    df['googleAPIsentences'] = sentenceList

    tokenList = []
    for token in annotations.tokens:
        tokenList.append({token.text_content: token.part_of_speech})
    df['googleAPItokens'] = tokenList

    df['googleAPIsentiment'] = [annotations.sentiment.score, annotations.sentiment.magnitude]

    entityList = []
    for entity in annotations.entities:
        entityList.append({'name': entity.name,
                           'type': entity.entity_type,
                           'wikipedia_url': entity.wikipedia_url,
                           'metadata': entity.metadata,
                           'salience': entity.salience})
    df['googleAPIentities'] = entityList

    return df
def sentiment_analysis(text):
    client = language.Client()
    document = client.document_from_text(text)
    sent_analysis = document.analyze_sentiment()
    dir(sent_analysis)
    sentiment = sent_analysis.sentiment

    return sentiment
def analyzeText(text):
    client = language.Client()
    document = client.document_from_text(text)
    sent_analysis = document.analyze_sentiment()
    sentiment = sent_analysis.sentiment
    ent_analysis = document.analyze_entities()
    entities = ent_analysis.entities
    return sentiment, entities
예제 #10
0
def setUpModule():
    _helpers.PROJECT = TESTS_PROJECT
    Config.CLIENT = language.Client()
    # Now create a bucket for GCS stored content.
    storage_client = storage.Client()
    bucket_name = 'new' + unique_resource_id()
    Config.TEST_BUCKET = storage_client.bucket(bucket_name)
    retry_429(Config.TEST_BUCKET.create)()
예제 #11
0
def setUpModule():
    Config.CLIENT = language.Client()
    # Now create a bucket for GCS stored content.
    storage_client = storage.Client()
    bucket_name = 'new' + unique_resource_id()
    Config.TEST_BUCKET = storage_client.bucket(bucket_name)
    # 429 Too Many Requests in case API requests rate-limited.
    retry_429 = RetryErrors(exceptions.TooManyRequests)
    retry_429(Config.TEST_BUCKET.create)()
예제 #12
0
def language_analysis(text):
	client = language.Client()
	document = client.document_from_text(text)
	sent_analysis = document.analyze_sentiment()
	# print(dir(sent_analysis))
	sentiment = sent_analysis.sentiment  # direction and magnitude
	ent_analysis = document.analyze_entities()
	entities = ent_analysis.entities  # entity and salience
	return sentiment, entities
예제 #13
0
def get_entity_counts_sentiment_score(message_subject, message_content):
    """Extract entities using google NLP API

    Sentiment analysis inspects the given text and identifies the
    prevailing emotional opinion within the text, especially to
    determine a writer's attitude as positive, negative, or neutral.

    Entity analysis inspects the given text for known entities (Proper
    nouns such as public figures, landmarks, and so on. Common nouns
    such as restaurant, stadium, and so on.) and returns information
    about those entities.

    Args
    text: content of text to feed into API

    Returns:
    entity_count_person, entity_count_location, entity_count_organization,
    entity_count_event, entity_count_work_of_art, entity_count_consumer_good,
    sentiment_score
    """

    text = message_subject + message_content

    client = language.Client()
    document = client.document_from_text(text)

    # Detects sentiment in the document.
    annotations = document.annotate_text(include_sentiment=True,
                                         include_syntax=False,
                                         include_entities=True)

    # get overal text sentiment score
    sentiment_score = annotations.sentiment.score

    # get total counts for each entity found in text
    PERSON = []
    LOCATION = []
    ORGANIZATION = []
    EVENT = []
    WORK_OF_ART = []
    CONSUMER_GOOD = []

    entities_found = []
    for e in annotations.entities:
        entities_found.append(e.entity_type)

    entity_count_person = len([i for i in entities_found if i == 'PERSON'])
    entity_count_location = len([i for i in entities_found if i == 'LOCATION'])
    entity_count_organization = len(
        [i for i in entities_found if i == 'ORGANIZATION'])
    entity_count_event = len([i for i in entities_found if i == 'EVENT'])
    entity_count_work_of_art = len(
        [i for i in entities_found if i == 'WORK_OF_ART'])
    entity_count_consumer_good = len(
        [i for i in entities_found if i == 'CONSUMER_GOOD'])

    return entity_count_person, entity_count_location, entity_count_organization, entity_count_event, entity_count_work_of_art, entity_count_consumer_good, sentiment_score
예제 #14
0
def langage_analysis(text):
    client = language.Client()
    document = client.document_from_text(text)
    sent_analysis = document.analyze_sentiment()
    print(dir(sent_analysis))
    sentiment = sent_analysis.sentiment
    ent_analysis = document.analyze_entities()
    entities = ent_analysis.entities
    return sentiment, entities
예제 #15
0
 def run(self):
     self._initLogging()
     logging.debug("***speech analyzer starting")
     try:
         self._language_client = language.Client()
         self._analyzeSpeech()
         logging.debug("speech analyzer done analyzing")
     except Exception, e:
         logging.exception("speech analyzer exception")
 def __init__(self, inputText, googleLanguageClient = None, watsonClient = None, googleLanguageModel = [], watsonLanguageModel = [], semanticRoleList = [], entitySizeLimit = 5, entities= [], keywords = []):
     self.googleLanguageClient = language.Client()
     self.watsonClient =  self.initialize_watson_client()
     self.inputText = inputText
     self.googleLanguageModel = self.googleLanguageClient.document_from_text(self.inputText, language='es', encoding=language.Encoding.UTF8)      
     self.watsonLanguageModel = self.watsonClient.analyze(text = self.inputText, features=[features.Entities(), features.Keywords(), features.SemanticRoles()])
     self.entitySizeLimit = entitySizeLimit
     self.entities = self.extract_entities()
     self.keywords = self.extract_keywords()
     self.semanticRoleList = semanticRoleList
예제 #17
0
def analyzeText(text):
    print "Performing sentiment analysis."

    API_SIZE_LIMIT = 1000000
    text = text[:API_SIZE_LIMIT]
    language_client = language.Client()
    document = language_client.document_from_text(text)
    sentiment = document.analyze_sentiment()

    return sentiment
    def analyze_text_google_cloud(self, article):
        client = language.Client()
        document = client.document_from_text(article["title"])
        annotations = document.annotate_text()

        return {
            "entities": annotations.entities,
            "tokens": annotations.tokens,
            "sentiment": annotations.sentiment
        }
예제 #19
0
def getSubject(text):
    language_client = language.Client()
    if isinstance(text, six.binary_type):
        text = text.decode('utf-8')
    document = language_client.document_from_text(text)
    tokens = document.analyze_syntax().tokens
    for token in tokens:
        tag = token.part_of_speech
        if tag == "PRON" or tag == "NOUN":
            return token.text_content
    return "Error 404"
예제 #20
0
def analyze(text):
    language_client = language.Client()

    document = language_client.document_from_html(text)

    annotations = document.annotate_text(include_sentiment=True,
                                         include_syntax=False,
                                         include_entities=False)

    value_dict = set_dict(annotations)

    return value_dict
def extract_syntax(transcriptions, metadata):
    """Extracts tokens in transcriptions using the GCP Natural Language API."""
    client = language.Client()

    document = client.document_from_text('\n'.join(transcriptions),
                                         language='en',
                                         encoding=_get_native_encoding_type())
    # Only extracting tokens here, but the API also provides these other things
    sentences, tokens, sentiment, entities, lang = document.annotate_text(
        include_syntax=True, include_entities=False, include_sentiment=False)

    return tokens, metadata
예제 #22
0
def analyze(wikipedia_news_filename):
    """call the cloud nlp API."""
    language_client = language.Client()

    with open(wikipedia_news_filename, 'r') as news_file:
        document = language_client.document_from_text(news_file.read())

        annotations = document.annotate_text(include_sentiment=True,
                                             include_syntax=False,
                                             include_entities=False)

        analyze_annot(annotations)
예제 #23
0
 def __init__(self,test_url):
     self.test_url = test_url
     self.language_client = language.Client()
     self.paper_dic = {
         'guardian':{
             "api_key":"54c650a2-8ab0-4a35-9417-7f2111fd29e7",
             "url_headers":{}},
         'cnn':{},
         'NYT':{},
         }
     self.set_url_headers()
     self.get_timeline()
예제 #24
0
def getVerb(text):
    language_client = language.Client()
    if isinstance(text, six.binary_type):
        text = text.decode('utf-8')
    document = language_client.document_from_text(text)
    tokens = document.analyze_syntax().tokens
    for token in tokens:
        # print(u'{}: {}'.format(token.part_of_speech, token.text_content))
        tag = token.part_of_speech
        if tag == "VERB":
            return token.text_content
    return "Error 404"
예제 #25
0
def syntax_text(text):
    """Detects syntax in the text."""
    language_client = language.Client()

    # Instantiates a plain text document.
    document = language_client.document_from_text(text)

    # Detects syntax in the document. You can also analyze HTML with:
    #   document.doc_type == language.Document.HTML
    tokens = document.analyze_syntax()

    for token in tokens:
        print('{}: {}'.format(token.part_of_speech, token.text_content))
예제 #26
0
def sentiment_file(gcs_uri):
    """Detects sentiment in the file located in Google Cloud Storage."""
    language_client = language.Client()

    # Instantiates a plain text document.
    document = language_client.document_from_url(gcs_uri)

    # Detects sentiment in the document. You can also analyze HTML with:
    #   document.doc_type == language.Document.HTML
    sentiment = document.analyze_sentiment()

    print('Score: {}'.format(sentiment.score))
    print('Magnitude: {}'.format(sentiment.magnitude))
예제 #27
0
def sentiment_text(text):
    """Detects sentiment in the text."""
    language_client = language.Client()

    # Instantiates a plain text document.
    document = language_client.document_from_text(text)

    # Detects sentiment in the document. You can also analyze HTML with:
    #   document.doc_type == language.Document.HTML
    sentiment = document.analyze_sentiment()

    print('Score: {}'.format(sentiment.score))
    print('Magnitude: {}'.format(sentiment.magnitude))
예제 #28
0
def syntax_file(gcs_uri):
    """Detects syntax in the file located in Google Cloud Storage."""
    language_client = language.Client()

    # Instantiates a plain text document.
    document = language_client.document_from_url(gcs_uri)

    # Detects syntax in the document. You can also analyze HTML with:
    #   document.doc_type == language.Document.HTML
    tokens = document.analyze_syntax()

    for token in tokens:
        print('{}: {}'.format(token.part_of_speech, token.text_content))
예제 #29
0
def get_sentiment(comment_tuple):
    language_client = language.Client()
    comments = comment_tuple[1]
    document = language_client.document_from_text(comments)
    annotations = document.annotate_text(include_syntax=False,
                                         include_entities=False,
                                         include_sentiment=True)

    s_scores = []
    for s in annotations.sentences:
        s_scores.append(s.sentiment.score)

    return (comment_tuple[0], s_scores)
def analyze(textToAnalyze):
    """Run a sentiment analysis request on text within a passed filename."""
    language_client = language.Client()
    document = language_client.document_from_html(
        textToAnalyze)  #TODO bu satır file almadan calısıyor mu bak

    annotations = document.annotate_text(include_sentiment=True,
                                         include_syntax=False,
                                         include_entities=False)

    # Print the results
    print_result(annotations)
    return annotations