Esempio n. 1
0
        entity_num = 0.0
        word_count = len(text.split())
        char_count = len(text)
        avg_word_len = char_count / word_count
        follower_count = row[4]
        anger = 0.0
        disgust = 0.0
        fear = 0.0
        joy = 0.0
        sadness = 0.0
        is_quote = 1 if row[1] == "TRUE" else 0

        # sentiment analysis
        sentiment_response = natural_language_understanding.analyze(
            text=text,
            features=Features(sentiment=SentimentOptions())).get_result()
        sentiment = sentiment_response['sentiment']['document']['score']

        # entity analysis
        entities_response = natural_language_understanding.analyze(
            text=text,
            features=Features(entities=EntitiesOptions(
                sentiment=True, emotion=True))).get_result()
        sentiment_sum = 0
        for entity in entities_response['entities']:
            sentiment_sum += entity['sentiment']['score'] * entity['relevance']
            entity_num += 1
            anger += entity['emotion']['anger'] * entity['relevance']
            disgust += entity['emotion']['disgust'] * entity['relevance']
            fear += entity['emotion']['fear'] * entity['relevance']
            joy += entity['emotion']['joy'] * entity['relevance']
Esempio n. 2
0
import json
from ibm_watson import NaturalLanguageUnderstandingV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
from ibm_watson.natural_language_understanding_v1 import Features, KeywordsOptions

authenticator = IAMAuthenticator('6Hc3iMJs5gGstVanqPXRjaPXmPBO7rhyy4KpSp7USNl4')
natural_language_understanding = NaturalLanguageUnderstandingV1(
    version='2019-07-12',
    authenticator=authenticator
)

natural_language_understanding.set_service_url('https://api.eu-gb.natural-language-understanding.watson.cloud.ibm.com/instances/b7b78abd-f1ec-4551-89bd-1ba280052302')

response = natural_language_understanding.analyze(
    url='https://twitter.com/kahanikaar_/status/1209402709612232704',
    features=Features(keywords=KeywordsOptions(sentiment=True,emotion=True,limit=2))).get_result()

print(json.dumps(response, indent=2))
    def emo_fn(self):
        cn = cp = cng = e1 = e2 = e3 = e4 = e5 = 0
        with open("C:\\Users\\Kripa\\Desktop\\convo.csv") as csv_file:
            csv_reader = csv.reader(csv_file, delimiter=',')
            for row in csv_reader:
                j = 0
                row = listtostring(row)
                response = natural_language_understanding.analyze(
                    text=row,
                    language='en',
                    features=Features(
                        sentiment=SentimentOptions(),
                        emotion=EmotionOptions(),
                    )).get_result()

                sen1 = response.get('sentiment').get('document').get('score')
                sen2 = response.get('sentiment').get('document').get('label')
                if sen1 == 0:
                    cn += 1

                elif sen1 > 0:
                    cp += 1

                else:
                    cng += 1
                op = response.get('emotion').get('document').get('emotion')

                # Create a list of tuples sorted by index 1 i.e. value field
                listofTuples = sorted(op.items(),
                                      reverse=True,
                                      key=lambda x: x[1])
                ll = listofTuples[0]
                d = dict(listofTuples)
                for k, v in d.items():
                    d1 = k
                    d2 = v
                    j += 1
                    if j > 0:
                        break
                if d1 == 'sadness':
                    e1 += 1
                elif d1 == 'joy':
                    e2 += 1
                elif d1 == 'fear':
                    e3 += 1
                elif d1 == 'disgust':
                    e4 += 1
                else:
                    e5 += 1

        s = s1 = 0
        s = cn + cng + cp
        pp = (cp * 100) / s
        ngp = (cng * 100) / s
        np = (cn * 100) / s
        s1 = e1 + e2 + e3 + e4 + e5
        e1p = (e1 * 100) / s1
        e2p = (e2 * 100) / s1
        e3p = (e3 * 100) / s1
        e4p = (e4 * 100) / s1
        e5p = (e5 * 100) / s1

        screen = Builder.load_string(screen_helper1)

        neutral = "Neutral: " + str(round(np, 2))
        pos = "Positive: " + str(round(pp, 2))
        neg = "Negative: " + str(round(ngp, 2))

        screen.ids.sentimentlist.add_widget(OneLineListItem(text=neutral, ))
        screen.ids.sentimentlist.add_widget(OneLineListItem(text=pos, ))
        screen.ids.sentimentlist.add_widget(OneLineListItem(text=neg, ))

        sm.add_widget(screen)

        screen = Builder.load_string(screen_helper2)

        sad = "Sad: " + str(round(e1p, 2))
        joy = "Joy: " + str(round(e2p, 2))
        fear = "Fear: " + str(round(e3p, 2))
        disgust = "Disgust: " + str(round(e4p, 2))
        angry = "Angry: " + str(round(e5p, 2))

        screen.ids.emotionallist.add_widget(OneLineListItem(text=sad, ))
        screen.ids.emotionallist.add_widget(OneLineListItem(text=joy, ))
        screen.ids.emotionallist.add_widget(OneLineListItem(text=fear, ))
        screen.ids.emotionallist.add_widget(OneLineListItem(text=disgust, ))
        screen.ids.emotionallist.add_widget(OneLineListItem(text=angry, ))

        sm.add_widget(screen)
    str1 = " "
    return (str1.join(s))


cn = cp = cng = e1 = e2 = e3 = e4 = e5 = 0
f_name = input("enter file in csv format")
with open("C:\\Users\\HP\\Desktop\\" + f_name + ".csv") as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    for row in csv_reader:
        j = 0
        row = listtostring(row)
        response = natural_language_understanding.analyze(
            text=row,
            language='en',
            features=Features(
                sentiment=SentimentOptions(),
                emotion=EmotionOptions(),
            )).get_result()

        sen1 = response.get('sentiment').get('document').get('score')
        sen2 = response.get('sentiment').get('document').get('label')
        if sen1 == 0:
            cn += 1

        elif sen1 > 0:
            cp += 1

        else:
            cng += 1
        op = response.get('emotion').get('document').get('emotion')
        #print(type(op))
        # Create a list of tuples sorted by index 1 i.e. value field
Esempio n. 5
0
        features=Features(sentiment=SentimentOptions(
            targets=['stocks']))).get_result()
    return response


#print(json.dumps(response, indent=2))
#
#response = natural_language_understanding.analyze(
#    html="<html><head><title>Emporphis</title></head><body><h1>IoT a Revolution for Retail Stores</h1><p>Internet of Things (IoT) is driving revolution and new opportunities by bringing every consumer, object, and activity into the digital realm. At the same time, leading businesses are making many changes within their organisations and enterprises by digitizing their employees, services, process, and product.</p></body></html>",
#    features=Features(emotion=EmotionOptions(targets=['iot','organisations']))).get_result()
#
#print(json.dumps(response, indent=2))

response = natural_language_understanding.analyze(
    url='www.impetus.com',
    features=Features(
        entities=EntitiesOptions(sentiment=True, limit=1))).get_result()


def nlpResponse(sitename):
    try:
        response = natural_language_understanding.analyze(
            url=sitename,
            features=Features(entities=EntitiesOptions(sentiment=True,
                                                       limit=1))).get_result()
        return response['entities']
    except:
        return f"Something went wrong. Please try with https:// or http://"


#import datetime as dt
#print(dt.datetime.now())
Esempio n. 6
0
def main(args):

    # Parse incoming request headers
    _c_type, p_dict = parse_header(args['__ow_headers']['content-type'])

    # Decode body (base64)
    decoded_string = b64decode(args['__ow_body'])

    # Set Headers for multipart_data parsing
    p_dict['boundary'] = bytes(p_dict['boundary'], "utf-8")
    p_dict['CONTENT-LENGTH'] = len(decoded_string)

    # Parse incoming request data
    multipart_data = parse_multipart(BytesIO(decoded_string), p_dict)

    # Build flac file from stream of bytes
    fo = open("audio_sample.flac", 'wb')
    fo.write(multipart_data.get('audio')[0])
    fo.close()

    car = multipart_data.get('car')[0]
    text = multipart_data.get('text')[0]
    """## Serviço NLU

    Você precisará de 3 coisas: A key e a URL do seu serviço de `Natural Language Understanding` e o model_id do seu Knowledge Studio treinado.
    """

    nlu_apikey = "R5Kq3Z4sJbPaepfWCC1d3iYch2kIEHJkF1sqnHZTC-C3"

    nlu_service_url = "https://api.us-south.natural-language-understanding.watson.cloud.ibm.com/instances/d26c8f6f-666f-44eb-a631-cb8b161f0c48"

    nlu_entity_model = "a52546bf-6061-4fd0-a3ec-f2e6aa6d19b9"
    """Agora instanciamos os serviços com as suas credenciais."""

    # Cria-se um autenticador
    nlu_authenticator = IAMAuthenticator(apikey=nlu_apikey)

    # Criamos o serviço passando esse autenticador
    nlu_service = NaturalLanguageUnderstandingV1(
        version='2018-03-16', authenticator=nlu_authenticator)

    # Setamos a URL de acesso do nosso serviço
    nlu_service.set_service_url(nlu_service_url)

    ## Serviço STT

    stt_apikey = "-pCzIHgC12ljTpVXELSfx71BAP2yUmAlacQaD1YXdZqM"

    stt_service_url = "https://api.us-south.speech-to-text.watson.cloud.ibm.com/instances/2dda5ef8-4933-4096-8fb6-ad817e0e105c"
    """E agora instanciamos o serviço com as suas credenciais."""

    stt_authenticator = IAMAuthenticator(apikey=stt_apikey)

    stt_service = SpeechToTextV1(authenticator=stt_authenticator)

    stt_service.set_service_url(stt_service_url)

    stt_model = 'pt-BR_BroadbandModel'

    if audio:

        # Read audio file and call Watson STT API:
        with open(
                os.path.join(os.path.dirname(__file__), './.',
                             'audio_sample.flac'), 'rb') as audio_file:
            # Transcribe the audio.flac with Watson STT
            # Recognize method API reference:
            # https://cloud.ibm.com/apidocs/speech-to-text?code=python#recognize
            stt_result = stt.recognize(
                audio=audio_file,
                content_type='audio/flac',
                model='pt-BR_BroadbandModel').get_result()

        results_stt = json.loads(
            json.dumps(stt_results, indent=2, ensure_ascii=False))

        text = results_stt['results'][0]['alternatives'][0]['transcript']

        # Return a dictionary with the transcribed text
        #return {
        #    "transcript": stt_result['results'][0]['alternatives'][0]['transcript']
        #}

    # O método analyze cuida de tudo
    nlu_response = nlu_service.analyze(
        text=text,
        features=Features(
            entities=EntitiesOptions(model=nlu_entity_model, sentiment=True)),
        language='pt').get_result()

    results_nlu = json.loads((json.dumps(nlu_response,
                                         indent=2,
                                         ensure_ascii=False)))

    return results_nlu
Esempio n. 7
0
def fetch_keywords_from_sentence(sentence):
    nlu = NaturalLanguageUnderstandingV1(version=config.WATSON_CONFIG["version"],
                                         url=config.WATSON_CONFIG["url"],
                                         iam_apikey=config.WATSON_CONFIG["iam_apikey"])
    response = nlu.analyze(text=sentence, features=Features(keywords=KeywordsOptions()))
    return extract_text_from_keywords_list(response.result["keywords"])
Esempio n. 8
0
def readContent(relPath):
    global count
    if (relPath.find('.DS') > -1):
        return
    files = os.listdir(relPath)
    articles = []
    for f in files:
        if (f.find('.tar') > -1 or f.find('.DS') > -1):
            continue
        pwf = open(relPath + '/' + f, 'r')
        lines = pwf.readlines()
        if (len(lines) < 7):  #skip if the article is missing
            continue
        lines.append('\n')
        article = ''

        for line in lines[7:-1]:
            article = article + str(line.replace('\n', ' '))

        articles.append(article)

    tfidf_vectorizer_vectors = tfidf_vectorizer.fit_transform(
        articles)  #what if it's empty?

    fileNum = 0
    for f in files:
        if (f.find('.tar') > -1 or f.find('.DS') > -1):
            continue
        pwf = open(relPath + '/' + f, 'r')
        lines = pwf.readlines()

        col2 = lines[2][3:lines[2].find('\n')].replace(',', '')  # date
        col2 = col2[4:-4]
        col2 = datetime.strptime(col2, '%b %d %Y %I:%M%p')
        # print(col2)
        # print(lines[2])
        col3 = lines[0][3:lines[0].find('\n')].replace(',', '')  # title

        # news_summary = lines[7]
        # news_body = lines[9]

        # complete_text = lines[7]

        if (len(lines) < 7):
            continue
        lines.append('\n')
        limit = lines[7:].index('\n')

        data = ''
        for i in range(7, 7 + limit):
            data = data + str(lines[i].replace('\n', ' '))

        # for i in range(0,len(lines)):
        #     print(i)
        #     print(lines[i])

        # print(data)

        #1. Get the companies
        response = natural_language_understanding.analyze(
            text=data,
            features=Features(
                entities=EntitiesOptions(sentiment=True),
                # keywords=KeywordsOptions(emotion=True, sentiment=True,
                #)
            )).get_result()

        # print(data)
        entities = response['entities']

        # print(entities)
        # print(len(entities))
        # print(type(entities))
        # print(col2)
        companies = []
        documents = []
        for entity in entities:
            if entity["type"] == "Company":
                if entity.get("text") != "Bloomberg" and entity.get(
                        "text") != "Reuters":
                    if entity.get("disambiguation"):
                        if entity.get("disambiguation").get(
                                "name") != "Reuters" and entity.get(
                                    "disambiguation").get(
                                        "name") != "Bloomberg":
                            entity["text"] = entity["text"].translate(
                                str.maketrans('', '', string.punctuation))
                            companies.append(entity)

        print(companies)
        for company in companies:
            stock_symbol = requests.get("http://d.yimg.com/aq/autoc?query=" +
                                        company["text"] +
                                        "&region=US&lang=en-US").json()
            sentiment = company['sentiment']
            #  print(sentiment)
            #  print(type(sentiment))
            if stock_symbol['ResultSet']['Result']:
                stock_symbol = stock_symbol['ResultSet']['Result'][0]['symbol']
                # print(stock_symbol)
                start_date = end_date = col2.strftime('%m/%d/%Y')
                start_datetime = col2

                #first, there needs to be a check if today is either a weekend or a federal holiday.
                while start_datetime.weekday(
                ) == saturday or start_datetime.weekday(
                ) == sunday or datetime.combine(
                        start_datetime, datetime.min.time()) in holidays:
                    start_datetime -= timedelta(days=1)

                yesterday_datetime = start_datetime - timedelta(days=1)

                while yesterday_datetime.weekday(
                ) == saturday or yesterday_datetime.weekday(
                ) == sunday or datetime.combine(
                        yesterday_datetime, datetime.min.time()) in holidays:
                    yesterday_datetime -= timedelta(days=1)

                tomorrow_datetime = start_datetime + timedelta(days=1)

                while tomorrow_datetime.weekday(
                ) == saturday or tomorrow_datetime.weekday(
                ) == sunday or datetime.combine(
                        tomorrow_datetime, datetime.min.time()) in holidays:
                    tomorrow_datetime += timedelta(days=1)

                ten_days_datetime = start_datetime - timedelta(days=10)

                yesterday_stock_data = None

                try:
                    yesterday_stock_data = get_stock_data(
                        stock_symbol,
                        start=yesterday_datetime.strftime('%m/%d/%Y'),
                        end=tomorrow_datetime.strftime('%m/%d/%Y'))
                    ten_days_stock_data = get_stock_data(
                        stock_symbol,
                        start=ten_days_datetime.strftime('%m/%d/%Y'),
                        end=start_datetime.strftime('%m/%d/%Y'),
                        interval='d')

                except KeyError:
                    pass

                if yesterday_stock_data is not None and ten_days_stock_data is not None:
                    yest_stock_dict = yesterday_stock_data.to_dict()
                    yest_stock_list = yesterday_stock_data.values.tolist()
                    ten_days_list = ten_days_stock_data.values.tolist()

                    if company["text"] == "Hulu":
                        print(company["text"])
                        print(yest_stock_dict)
                        print(yest_stock_list)
                        print(ten_days_list)
                        pass

                    open_ = yest_stock_dict["Open"].get(
                        pd.Timestamp(
                            datetime.combine(start_datetime,
                                             datetime.min.time())))
                    close = yest_stock_dict["Close"].get(
                        pd.Timestamp(
                            datetime.combine(start_datetime,
                                             datetime.min.time())))
                    volume = yest_stock_dict["Volume"].get(
                        pd.Timestamp(
                            datetime.combine(start_datetime,
                                             datetime.min.time())))

                    if open_ is None:
                        open_ = ten_days_list[len(ten_days_list) - 2][Open]
                        close = ten_days_list[len(ten_days_list) - 2][Close]
                        volume = ten_days_list[len(ten_days_list) - 2][Volume]

                    open_yesterday = yest_stock_dict["Open"].get(
                        pd.Timestamp(
                            datetime.combine(yesterday_datetime,
                                             datetime.min.time())))
                    close_yesterday = yest_stock_dict["Close"].get(
                        pd.Timestamp(
                            datetime.combine(yesterday_datetime,
                                             datetime.min.time())))
                    volume_yesterday = yest_stock_dict["Volume"].get(
                        pd.Timestamp(
                            datetime.combine(yesterday_datetime,
                                             datetime.min.time())))

                    if open_yesterday is None:
                        open_ = ten_days_list[len(ten_days_list) - 3][Open]
                        close = ten_days_list[len(ten_days_list) - 3][Close]
                        volume = ten_days_list[len(ten_days_list) - 3][Volume]

                    open_tomorrow = yest_stock_dict["Open"].get(
                        pd.Timestamp(
                            datetime.combine(tomorrow_datetime,
                                             datetime.min.time())))
                    close_tomorrow = yest_stock_dict["Close"].get(
                        pd.Timestamp(
                            datetime.combine(tomorrow_datetime,
                                             datetime.min.time())))
                    volume_tomorrow = yest_stock_dict["Volume"].get(
                        pd.Timestamp(
                            datetime.combine(tomorrow_datetime,
                                             datetime.min.time())))

                    if open_tomorrow is None:
                        open_tomorrow = ten_days_list[len(yest_stock_list) -
                                                      1][Open]
                        close_tomorrow = ten_days_list[len(yest_stock_list) -
                                                       1][Close]
                        volume_tomorrow = ten_days_list[len(yest_stock_list) -
                                                        1][Volume]

                    open_ten_days = ten_days_list[0][Open]
                    close_ten_days = ten_days_list[0][Close]
                    volume_ten_days = ten_days_list[0][Volume]

                    # print(open_)
                    # print(close)
                    # print(volume)

                    # print(open_yesterday)
                    # print(close_yesterday)
                    # print(volume_yesterday)

                    # print(open_tomorrow)
                    # print(close_tomorrow)
                    # print(volume_tomorrow)

                    # print(open_ten_days)
                    # print(close_ten_days)
                    # print(volume_ten_days)

                    if close is None or close_ten_days is None or close_yesterday is None or open_ is None or open_ten_days is None or open_yesterday is None:
                        print("WTF FOUND YOU")
                        continue

                    close_ten_day_pct = get_percentage_change(
                        close_ten_days, close)
                    open_ten_day_pct = get_percentage_change(
                        open_ten_days, open_)

                    close_one_day_pct = get_percentage_change(
                        close_yesterday, close)
                    open_one_day_pct = get_percentage_change(
                        open_yesterday, open_)
                    vector_tfidfvectorizer = tfidf_vectorizer_vectors[fileNum]
                    df = pd.DataFrame(
                        vector_tfidfvectorizer.T.todense(),
                        index=tfidf_vectorizer.get_feature_names(),
                        columns=["tfidf"])
                    df = df.sort_values(by=["tfidf"], ascending=False).head(10)
                    # print(df.to_dict())

                    post = {"stock_symbol":stock_symbol,"date":start_datetime, "sentiment":sentiment, "tf_idf":df.to_dict(), \
                        "open":open_, "close":close, "volume":volume, "close_ten_day_pct":close_ten_day_pct, "open_ten_day_pct":open_ten_day_pct, \
                            "close_one_day_pct":close_one_day_pct, "open_one_day_pct":open_one_day_pct, "open_tomorrow":open_tomorrow, "close_tomorrow":close_tomorrow, \
                                "volume_tomorrow":volume_tomorrow, "summary":data, "link":lines[3][3:], "title":lines[0][3:]}

                    collection.insert_one(post)
                    print(post)

                    #print(df)

        # if count == 10:
        #     break

        # count += 1
        fileNum = fileNum + 1
Esempio n. 9
0
def get_entity(text):
    response = natural_language_understanding.analyze(
        text=text, features=Features(entities=EntitiesOptions(sentiment=True, limit=1))
    ).get_result()

    return response
Esempio n. 10
0
def get_actions_related_to_entities():
    """ 
    FUNCTION: Perform semantic roles (POS tagging or identification of action, subject, object) in titles of articles to categorize action 
    entities with products/general entities they describe (and vice versa - match product/general entities with action entities that describe them)
        Represented by 2 dictionaries:
        - ACTS TO ENTS: which match actions to product/general entities 
                [action: [all event entities in sentence that action describes]]
        - ENTS TO ACTS: match product/general entities to actions that dsscribe them 
    
    STRUCTURE:
    PART 1:First calls IBM Watson semantic roles function to identify action, subject, object in each sentence, 
        but if IBM Watson is unable to perform function, go to PART 2

    PART 2: perform custom POS tagging engine 
        - first analyzes if there is (action + noun) bigrams sentence that does not contain ('or' + action) 
          because that indicates the verb only describes noun directly following it 

            e.g., DOES NOT CONTAIN 'OR': 'Create user accounts and set permissions' -- 'create' only describes 'user accounts' and 'set' only describes 'permissions'
            e.g., CONTAINS 'OR': 'Create, suspend, or cancel customer subscriptions' -- 'create', 'suspend', 'cancel' ALL describe 'customer subscriptions'
        
        - keeps track of the indices of the (action + noun) bigrams 
        - split the POS tag list by the indices and perform analysis within each sublist 
        - concatenates all the action and event entity words by "~" character
        - splits actions by "~" and matches event entities to each individual action 
            ***at the moment, only doing this for actions because usually actions are just one word, so each token in the actions list 
               concatenated by "~" can be assumed to be a different action whereas event entities are often more than one word, currently not 
               enough info on how to split event entities concatenated by "~"
    """

    actions_to_entities_dict = {}
    entities_to_actions_dict = {}

    #print sentences
    for i in read_output:
        print(i[2])

    for i in read_output:
        url_link = i[0]
        title = i[2]
        content = i[3]
        content_sentences = i[3].split('.')
        title_tokens = title.split(' ')

        response = natural_language_understanding.analyze(
            text=title,
            features=Features(
                semantic_roles=SemanticRolesOptions())).get_result()
        json_dict = json.loads(
            json.dumps(response))  #turns json string output into dictionary

        #extract json values
        values_to_actions = []  # [event entities]
        values_to_entities = []  #[action entities]
        subject = ''
        action = ''
        object_string = ''
        entity = ''
        sentence = title

        #-----PART 1: PERFORM IBM WATSON SEMANTIC ROLES----

        if (json_dict['semantic_roles'] != []):
            print('ibm watson')
            sentence = json_dict['semantic_roles'][0]['sentence']
            if (json_dict['semantic_roles'][0]['subject']['text'] is not None):
                subject = json_dict['semantic_roles'][0]['subject']['text']
                print(subject)
            if (json_dict['semantic_roles'][0]['action']['text'] is not None):
                action = json_dict['semantic_roles'][0]['action']['text']
                print(action)
            if (json_dict['semantic_roles'][0]['object']['text'] is not None):
                print(object)
                object_string = json_dict['semantic_roles'][0]['object'][
                    'text']

            entity = entity + '~ ' + subject
            values_to_actions.append(entity)
            values_to_entities.append(action)
            #map each action/verb to the relavant entities; categorize entities to each action
            check_if_action_key_exists(action, actions_to_entities_dict,
                                       values_to_actions)
            check_if_entity_key_exists(entity, entities_to_actions_dict,
                                       values_to_entities)

        # ----- PART 2: PERFORM CUSTOM POS TAGGING IF WATSON FAILS ------
        else:
            tokens = nltk.word_tokenize(title)
            POS_tags = nltk.pos_tag(tokens)
            print(POS_tags)
            action_noun_template_count = 0
            bigram_dict = {}
            bigrams = [
            ]  #keep track of bigrams to compare verb + noun templates
            contains_or_conjunction = False
            index_list = [
            ]  #list of indices by which POS tags will be split by
            index_list.append(0)

            #populate bigrams based on criteria described above
            for i, val in enumerate(POS_tags):
                if (i == len(tokens) - 1):
                    bigrams.append([val[1], ''])
                else:
                    bigrams.append([val[1], POS_tags[i + 1][1]])
                    if (val[0].lower() == 'or' and
                        (POS_tags[i + 1][1]
                         in ('VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ') or
                         POS_tags[i + 1][1] in action_entity_output_string)):
                        contains_or_conjunction = True

            if contains_or_conjunction == False:
                for i, val in enumerate(
                        bigrams):  #i is index of POS tag in sentence
                    first = val[0]
                    second = val[1]
                    if ((first in ('VB', 'VBD', 'VBG', 'VBN', 'VBP')
                         or first in action_entity_output_string)
                            and second in ('NN', 'NNP', 'NNPS', 'NNS', 'CD')):
                        if (i != 0):
                            index_list.append(i)

            index_list.append(len(tokens) - 1)
            #go through POS_tags based on split indices to populate the dictionaries
            count = 0
            for i, index in enumerate(index_list):
                if (i < len(index_list) - 1):
                    if (len(index_list) > 2):
                        action = ''
                        entity = ''
                        values_to_actions = []
                        values_to_entities = []
                    for (word,
                         tag) in POS_tags[index_list[i]:index_list[i + 1]]:
                        if (word.lower() in action_entity_output_string):
                            if (
                                    word == tokens[0]
                            ):  #if first word of sentence is in action library, highly likely it is an action (nltk sometimes tags verbs as nouns)
                                action = action + '~ ' + word
                            else:
                                if (
                                        tag not in ('NNP', 'NNPS', 'NNS', 'NN',
                                                    'CD')
                                ):  #need further clarification because POS tag isn't always most accurate
                                    action = action + '~ ' + word
                        elif (tag in ('NNP', 'NNPS', 'NNS', 'NN', 'CD') and
                              word.lower() not in action_entity_output_string):
                            entity = entity + '~ ' + word  #could be more than one product/general entity or entity is a phrase not a single word
                    action = action.strip('~')
                    print(action)
                    entity = entity.strip('~')
                    if (
                            '~' in action
                    ):  #handles scenarios in which multiple verbs are identified in sentence and we wamt to map each of them to the same noun entities identified
                        #e.g., "Create, suspend, or cancel customer subscriptions - Partner Center"-- we want ['Create' : [customer subscriptions, Partner Center]], ['suspend': [customer subscriptions, Partner Center]], ['cancel': [customer subscriptions, Partner Center]]
                        action_list = action.split('~')
                        for action_item in action_list:
                            values_to_entities.append(action_item)
                        values_to_actions.append(entity)
                        if (entity != ''):
                            check_if_entity_key_exists(
                                entity, entities_to_actions_dict,
                                values_to_entities)
                        for action_item in action_list:
                            if (action_item != ''):  #see helper function
                                check_if_action_key_exists(
                                    action_item, actions_to_entities_dict,
                                    values_to_actions)

                    else:
                        values_to_entities.append(action)
                        values_to_actions.append(entity)
                        if (action != ''):
                            check_if_action_key_exists(
                                action, actions_to_entities_dict,
                                values_to_actions)
                        if (entity != ''):

                            check_if_entity_key_exists(
                                entity, entities_to_actions_dict,
                                values_to_entities)
                        #print(values_to_actions)
    print('---- OUTPUT ----')
    print('ACTIONS MAPPED TO GEN/PROD ENTITIES')
    print(actions_to_entities_dict)
    Tools.write_csv_data('./watson_output_actionsMap.csv',
                         actions_to_entities_dict)
    print('GEN/PRDO ENTITIES MAPPED TO ACTIONS')
    print(entities_to_actions_dict)
    Tools.write_csv_data('./watson_output_entitiesMap.csv',
                         entities_to_actions_dict)
  text_file = open("sample_split" + str(i) + ".txt", "w", encoding="utf-8")
  n = text_file.write(split_plain_text)
  text_file.close()
  i += 1

# KEYWORD EXTRACTION
authenticator = IAMAuthenticator(apikey)
natural_language_understanding = NaturalLanguageUnderstandingV1(
    version='2020-08-01',
    authenticator=authenticator
)
natural_language_understanding.set_service_url(service_url)

for j in range(i+1):
  text = open("sample_split" + str(j) + ".txt", "r", encoding="utf-8").read()
  response = natural_language_understanding.analyze(text = text, features=Features(keywords=KeywordsOptions(limit=4500))).get_result()

  ibm_output = json.dumps(response, indent=2)
  json_file = open("ibm_output_" + str(j) + ".json", "w", encoding="utf-8")
  n = json_file.write(ibm_output)
  json_file.close()

PARSE JSON FILES FOR KEYWORDS
keywords = []

for k in range(i+1):
  json_file = open("ibm_output_" + str(k) + ".json", "r", encoding="utf-8").read()
  json_output = json.loads(json_file)
  for keyword in json_output['keywords']:
    text = keyword['text'].replace('\u2019', "'")
    relevance = keyword['relevance']
Esempio n. 12
0
def main(params):

    text = params['text'].replace("\n", "")
    # On envoie le texte à NLU
    response_nlu = naturalLanguageUnderstanding.analyze(
        text=text,
        features=Features(concepts=ConceptsOptions(limit=5),
                          entities=EntitiesOptions(emotion=True,
                                                   sentiment=True),
                          keywords=KeywordsOptions(emotion=True,
                                                   sentiment=True),
                          sentiment=SentimentOptions(document=True),
                          emotion=EmotionOptions(document=True))).get_result()

    # Le premier critère est que l'article parle d'une entité "Company". On boucle donc sur les entités reconnues par NLU
    company = ""
    location = ""
    i = 0
    while (i < len(response_nlu["entities"])
           and (company == "" or location == "")):
        if (response_nlu["entities"][i]["type"] == "Company"
                and company == ""):
            company = response_nlu["entities"][i]["text"]
            sentiment = response_nlu["entities"][i]["sentiment"]["score"]
            emotion_json_pointer = response_nlu["entities"][i]["emotion"]
            sadness = emotion_json_pointer["sadness"]
            joy = emotion_json_pointer["joy"]
            disgust = emotion_json_pointer["disgust"]
            anger = emotion_json_pointer["anger"]
            score_pondere_company = -0.5 * (anger + disgust + sadness -
                                            joy) + sentiment
        if (response_nlu["entities"][i]["type"] == "Location"
                and location == ""):
            location = response_nlu["entities"][i]["text"]
        i += 1

    # On collecte et stocke les valeurs des sentiments et émotions de l'article
    sentiment = response_nlu["sentiment"]["document"]["score"]
    emotion_json_pointer = response_nlu["emotion"]["document"]["emotion"]
    sadness = emotion_json_pointer["sadness"]
    joy = emotion_json_pointer["joy"]
    disgust = emotion_json_pointer["disgust"]
    anger = emotion_json_pointer["anger"]
    score_pondere = -0.5 * (anger + disgust + sadness - joy) + sentiment

    if (company != "" and score_pondere < 0.5):
        flag_article_retained = 0
        # We initialize the list of keywords, the dictionary which will store the data on the article after processing and the counter to count how many entities were detected (to further place the article in list_already_treated_MANA_articles by its relevance)
        keywords_list = []
        list_keywords_confirmed = []
        list_alerting_entities_confirmed = []
        list_sentences_confirmed = []
        list_keywords_deceitful = []
        #counter_confirmed_detected_alerting_entities=0

        for l in range(len(response_nlu["keywords"])):
            emotion_json_pointer = response_nlu["keywords"][l]["emotion"]
            sadness = emotion_json_pointer["sadness"]
            joy = emotion_json_pointer["joy"]
            disgust = emotion_json_pointer["disgust"]
            anger = emotion_json_pointer["anger"]
            sentiment = response_nlu["keywords"][l]["sentiment"]["score"]
            score_pondere_keyword = -0.5 * (anger + disgust + sadness -
                                            joy) + sentiment
            keywords_list.append(
                [response_nlu["keywords"][l]["text"], score_pondere_keyword])

        for keyword_data in keywords_list:
            keyword = keyword_data[0]
            response_bot = assistant.message(
                #workspace_id = 'a2dd5d22-63b4-4915-aac8-1c4f6fd358f6',
                workspace_id='6d7f9feb-3d05-4c0e-82b5-6c509638648c',
                input={
                    'text': keyword
                }).get_result()
            # If the bot has recognized either an alerting entity or the intent Oui_MANA or Non_MANA then the answer is different that the anything else node with text: 'No redhibitory word detected'
            if response_bot["output"]["text"] != [
                    'No redhibitory word detected'
            ]:
                if response_bot["output"]["text"] != [
                        'OuiMANA'
                ] and response_bot["output"]["text"] != ['NonMANA']:
                    position_alerting_entity = response_bot['entities'][0][
                        'location']
                    alerting_entity = response_bot['input'][
                        'text'][position_alerting_entity[0]:
                                position_alerting_entity[1]]
                    list_alerting_entities_confirmed.append(alerting_entity)
                    #counter_confirmed_detected_alerting_entities+=1
                for sentence_keyword in text.split('.'):
                    if keyword in sentence_keyword:
                        # If an alerting entity was discovered, meaning it is not one of the intents by elimination
                        #if response_bot["output"]["text"]!=['OuiMANA'] and response_bot["output"]["text"]!=['NonMANA']:
                        # We need the following little trick to catch the exact synonym of entity value that was detected in the input keyword
                        # Having collected the sentences in which this entity appears, we now send them back to the bot, whose nodes were placed with a jump to the nodes of the intents to check whether the sentences trigger the Oui_MANA or Non_MANA intent
                        confirmation_bot = assistant.message(
                            #workspace_id = 'a2dd5d22-63b4-4915-aac8-1c4f6fd358f6',
                            workspace_id='6d7f9feb-3d05-4c0e-82b5-6c509638648c',
                            input={
                                'text': sentence_keyword
                            },
                            context=response_bot["context"]).get_result()
                        if confirmation_bot["output"]["text"] == ['OuiMANA']:
                            # The value of the flag indicated that the 1st layer detected classified the article, i.e. an alerting entity was detected and its sentences were relevant for MANA
                            try:
                                assistant.create_example(
                                    #workspace_id = 'a2dd5d22-63b4-4915-aac8-1c4f6fd358f6',
                                    workspace_id=
                                    '6d7f9feb-3d05-4c0e-82b5-6c509638648c',
                                    intent='OuiMANA',
                                    text=sentence_keyword,
                                ).get_result()
                            except KeyboardInterrupt:
                                return 0
                            except:
                                pass

                            flag_article_retained = 1
                            list_keywords_confirmed.append(keyword_data)
                            list_sentences_confirmed.append(sentence_keyword)

                        elif confirmation_bot["output"]["text"] == ['NonMANA']:
                            #if response_bot["output"]["text"]!=['OuiMANA'] and response_bot["output"]["text"]!=['NonMANA']:
                            try:
                                assistant.create_example(
                                    #workspace_id = 'a2dd5d22-63b4-4915-aac8-1c4f6fd358f6',
                                    workspace_id=
                                    '6d7f9feb-3d05-4c0e-82b5-6c509638648c',
                                    intent='NonMANA',
                                    text=sentence_keyword,
                                ).get_result()
                            except KeyboardInterrupt:
                                return 0
                            except:
                                pass
                            list_keywords_deceitful.append(keyword_data)
                        # It is possible that no alerting entity was detected but that the keyword triggered the intent of the bot
                        # Hence it might be a less evident, more subtle MANA phrase with no "redhibitory words", hence the flag value 2 for 2nd layer
                        #(if the flag was not already set to 1 by the confirmation of a MANA alert detection)
                        #else:
                        #confirmation_MANA_sentence(keyword,sentence_keyword,assistant,response_bot,counter_confirmed_detected_alerting_entities,flag_article_retained)

        if flag_article_retained == 0:
            classifiers = natural_language_classifier.list_classifiers(
            ).get_result()
            response_nlc = natural_language_classifier.classify(
                classifiers["classifiers"][-1]["classifier_id"],
                text[0:2045]).get_result()
            # The flag value of 3 stands for 3rd layer
            if response_nlc['top_class'] == "Oui_MANA":
                flag_article_retained = 3

    # If the article was retained by one layer, i.e. that the flag value is not 0, we store all its information
        article_highlighted = text
        if flag_article_retained != 0:
            score_keywords_confirmed = []

            list_sentences_confirmed = list(set(list_sentences_confirmed))
            count_sentences = len(list_sentences_confirmed)
            for sentence in list_sentences_confirmed:
                article_highlighted = article_highlighted.replace(
                    sentence, '<mark style="background-color: yellow">' +
                    sentence + '</mark>')

            for k in list_keywords_confirmed:
                score_keywords_confirmed = +k[1]

            list_all_keywords = list_keywords_confirmed + list_keywords_deceitful
            list_all_keywords = list(set(map(tuple, list_all_keywords)))
            for keyword_data in list_all_keywords:
                article_highlighted = article_highlighted.replace(
                    keyword_data[0],
                    '<mark style="background-color: orange">' +
                    keyword_data[0] + "(" + str(round(keyword_data[1], 2)) +
                    ")" + '</mark>')

            list_alerting_entities_confirmed = list(
                set(list_alerting_entities_confirmed))
            for keyword in list_alerting_entities_confirmed:
                article_highlighted = article_highlighted.replace(
                    keyword, '<mark style="background-color: red">' + keyword +
                    '</mark>')

            article_highlighted = article_highlighted.replace('$', 'dollars')

            return {
                'flag': flag_article_retained,
                'location': location,
                'company': company,
                'score_company': score_pondere_company,
                'score': score_pondere,
                'count': count_sentences,
                'text': article_highlighted,
                'score_keywords_confirmed': score_keywords_confirmed
            }

        else:
            list_keywords_deceitful = list(
                set(map(tuple, list_keywords_deceitful)))
            for keyword_data in list_keywords_deceitful:
                article_highlighted = article_highlighted.replace(
                    keyword_data[0],
                    '<mark style="background-color: orange">' +
                    keyword_data[0] + "(" + str(round(keyword_data[1], 2)) +
                    ")" + '</mark>')

            return {
                'flag': flag_article_retained,
                'location': location,
                'company': company,
                'score_company': score_pondere_company,
                'score': score_pondere,
                'count': 0,
                'text': article_highlighted,
                'score_keywords_confirmed': 0
            }

    else:
        return {
            'flag': '-1',
            'location': '0',
            'company': '0',
            'score_company': '0',
            'score': '0',
            'count': '0',
            'text': text,
            'score_keywords_confirmed': '0'
        }
Esempio n. 13
0
def clustering(df1):
    print("entered clustering function")
    vectorizer = TfidfVectorizer(stop_words='english')
    X = vectorizer.fit_transform(df1['INTENTEXAMPLES'])
    df1['CLUSTER'] = ''

    # Intent clustering with initial 10 clusters
    true_k = 10
    model = KMeans(n_clusters=true_k, init='k-means++', max_iter=100, n_init=1)
    model.fit(X)

    KMeans(algorithm='auto',
           copy_x=True,
           init='k-means++',
           max_iter=100,
           n_clusters=10,
           n_init=1,
           n_jobs=1,
           precompute_distances='auto',
           random_state=None,
           tol=0.0001,
           verbose=0)

    order_centroids = model.cluster_centers_.argsort()[:, ::-1]
    terms = vectorizer.get_feature_names()

    for i in range(true_k):
        print("Cluster %d:" % i),
        for ind in order_centroids[i, :10]:
            print('%s' % terms[ind])

    i = 0
    for i in range(0, len(df1)):
        input = df1['INTENTEXAMPLES'][i]
        X = vectorizer.transform([input])
        predicted = model.predict(X)
        df1['CLUSTER'][i] = predicted[0]
        i = i + 1

    df2 = df1.groupby('CLUSTER')['INTENTEXAMPLES'].agg(' '.join).reset_index()

    # Fetching intents of the clustered questions
    authenticator = IAMAuthenticator(
        'FMUFQJHtKvAqukIATHrBQqnVy8GP_5lvN5Iq0JzokZgn')
    natural_language_understanding = NaturalLanguageUnderstandingV1(
        version='2019-07-12', authenticator=authenticator)

    natural_language_understanding.set_service_url(
        'https://api.us-south.natural-language-understanding.watson.cloud.ibm.com/instances/f9f3fbd1-3de7-44e6-a53a-dc93992d6627'
    )
    df2['INTENT'] = ''

    i = 0
    for i in range(0, len(df2)):
        try:
            response = natural_language_understanding.analyze(
                text=df2['INTENTEXAMPLES'][i],
                features=Features(keywords=KeywordsOptions(
                    sentiment=False, emotion=False, limit=3))).get_result()
            if response['keywords'] != []:
                if len(response['keywords']) == 1:
                    df2['INTENT'][i] = response['keywords'][0]['text']
                elif len(response['keywords']) == 2:
                    a = response['keywords'][0]['text']
                    b = response['keywords'][1]['text']
                    df2['INTENT'][i] = (a + '_' + b)

                else:
                    a = response['keywords'][0]['text']
                    b = response['keywords'][1]['text']
                    c = response['keywords'][2]['text']
                    df2['INTENT'][i] = (a + '_' + b + '_' + c)
            else:
                df2['INTENT'][i] = ''
        except:
            df2['INTENT'][i] = ''
        i = i + 1

    df2['INTENT'] = df2['INTENT'].replace(' ', '', regex=True)

    event_dictionary = pd.Series(df2.INTENT.values,
                                 index=df2.CLUSTER).to_dict()

    df1['INTENT_KMEANS'] = df1['CLUSTER'].apply(set_value,
                                                args=(event_dictionary, ))

    #df1.to_excel('C:/McD/covid/NLP_bot/NLP.xlsx', encoding ='utf8')
    #print("excel printed")

    return df1
def main():
    # connecting to MongoDB
    myclient = MongoClient()
    myclient = MongoClient("mongodb://localhost:27017/")

    # Navigate to proper database and collection
    mydb_input = input('Enter the name of your MongoDB database: ')
    mydb = myclient[mydb_input]
    mycol_input = input('Enter the name of your MongoDB collection: ')
    mycol = mydb[mycol_input]

    # Initialize list of dictionaries
    keywords_list = []
    entities_list = []

    for document in mycol.find():
        try:
            if ((detect(str(
                    document['tweet'])) == 'en')):  # if the tweet is english
                # clean the tweet
                cleaned_tweet = clean_tweet(str(document['tweet']))
                # generate response from NLU
                response = natural_language_understanding.analyze(
                    text=cleaned_tweet,
                    features=Features(
                        entities=EntitiesOptions(sentiment=True, limit=1),
                        keywords=KeywordsOptions(sentiment=True,
                                                 limit=1))).get_result()
                # KEYWORDS: generate/update dict entry for keyword
                try:
                    keyword_found = False
                    for item in keywords_list:
                        if item['text'] == response['keywords'][0]['text']:
                            item['count'] += 1
                            keyword_found = True
                            break
                    if not keyword_found:
                        # create keyword_dict entry of output from NLU
                        keyword_entry = {
                            'text':
                            response['keywords'][0]['text'],
                            'count':
                            response['keywords'][0]['count'],
                            'sentiment':
                            response['keywords'][0]['sentiment']['score']
                        }
                        keywords_list.append(keyword_entry)
                except Exception as e:
                    print('Error adding keyword: ' + str(e))

                # ENTITIES: generate/update dict entry for keyword
                try:
                    entity_found = False
                    for item in entities_list:
                        if item['text'] == response['entities'][0]['text']:
                            item['count'] += 1
                            entity_found = True
                            break
                    if not entity_found:
                        # create entity_dict entry
                        entity_entry = {
                            'text':
                            response['entities'][0]['text'],
                            'count':
                            response['entities'][0]['count'],
                            'sentiment':
                            response['entities'][0]['sentiment']['score']
                        }
                        entities_list.append(entity_entry)
                except Exception as e:
                    print('Error adding entity: ' + str(e))
        except Exception as e:
            print('Error processing tweet: ' + str(e))

    # top 20 only
    keywords_sorted = sorted(keywords_list,
                             key=lambda i: i['count'],
                             reverse=True)
    entities_sorted = sorted(entities_list,
                             key=lambda i: i['count'],
                             reverse=True)

    if len(keywords_sorted) > 20:
        keywords_sorted[:20]
    if len(entities_sorted) > 20:
        entities_sorted[:20]

    # Creating a dataframe using pandas
    df_keywords = pandas.DataFrame(keywords_sorted)
    df_keywords_columns = ['text', 'count', 'sentiment']
    df_keywords = df_keywords.reindex(columns=df_keywords_columns)

    df_entities = pandas.DataFrame(entities_sorted)
    df_entities_columns = ['text', 'count', 'sentiment']
    df_entities = df_entities.reindex(columns=df_entities_columns)

    # Writing the data to a .csv file
    filename_keywords = input(
        'Enter a .csv file name to save the keywords data to: ')
    df_keywords.to_csv(filename_keywords, encoding='utf-8', index=False)
    print('File successfully saved!')
    print()
    filename_entities = input(
        'Enter a .csv file name to save the entities data to: ')
    df_entities.to_csv(filename_entities, encoding='utf-8', index=False)
    print('File successfully saved!')
Esempio n. 15
0
# Receiving the input from the latest user to further analyze with IBM Watson's natural language processing service
mostRecentUser = mentalHealthData.pop()

# Authenticating access to IBM Watson
authenticator = IAMAuthenticator(
    '1jplXA1AMzd3rlm3cRE-Nws1AwATXKe1cS7PUfY_dS8M')
natural_language_understanding = NaturalLanguageUnderstandingV1(
    version='2019-07-12', authenticator=authenticator)
natural_language_understanding.set_service_url(
    'https://api.us-south.natural-language-understanding.watson.cloud.ibm.com/instances/8ac78265-c41b-45ea-8bc3-6942e96e3d63'
)

# Using Watson to process description of current mental state given by current user of virtual assistant
watsonAnalysis = natural_language_understanding.analyze(
    text=mostRecentUser['Description'],
    features=Features(emotion=EmotionOptions())).get_result()
emotionLevels = watsonAnalysis['emotion']['document']['emotion']
Row = mostRecentUser['Row']
pp.pprint(Row)

# Printing output for volunteer who will be picking up the call
print('I am detecting these levels of emotions from the current caller')
pp.pprint(emotionLevels)

MHdataSheet.update_cell(Row, 9, emotionLevels['anger'])
MHdataSheet.update_cell(Row, 10, emotionLevels['disgust'])
MHdataSheet.update_cell(Row, 11, emotionLevels['fear'])
MHdataSheet.update_cell(Row, 12, emotionLevels['joy'])
MHdataSheet.update_cell(Row, 13, emotionLevels['sadness'])

# Using IBM Watson's emotion level outputs and the individual's own distress level rating
Esempio n. 16
0
def get_concepts(text):
    concepts = nlu.analyze(
        text=text,
        features=Features(concepts=ConceptsOptions(limit=1))).get_result()
    return concepts['concepts']
Esempio n. 17
0
def facebook_crawler():

    # 권한 팝업창때문에 넣음
    option = Options()
    option.add_experimental_option(
        "prefs", {"profile.default_content_setting_values.notifications": 1})

    driver = webdriver.Chrome(
        executable_path="C:/hanium/han_fb/chromedriver.exe",
        chrome_options=option)

    driver.implicitly_wait(3)

    driver.get('https://www.facebook.com/')

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')

    ## 로그인
    email = driver.find_element_by_xpath("//input[@name='email']")
    password = driver.find_element_by_xpath("//input[@name='pass']")
    btn = driver.find_element_by_xpath("//*[@id='u_0_b']")
    #이메일 비밀번호 입력하세용
    email.send_keys("")
    password.send_keys("")
    btn.click()
    # 키워드 검색하기
    searchbox = driver.find_element_by_xpath(
        "//*[@id='mount_0_0']/div/div[1]/div[1]/div[2]/div[2]/div/div[1]/div/div[3]/label/input"
    )
    searchbox.send_keys("covid")
    time.sleep(3)
    searchbox.send_keys(Keys.RETURN)

    last_height = driver.execute_script("return document.body.scrollHeight")

    while True:
        # 스크롤최대
        driver.execute_script(
            "window.scrollTo(0, document.body.scrollHeight);")

        time.sleep(3)

        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

    # # 창 일단 모두 열어두자
    down_Scroll = 30
    for i in range(down_Scroll):
        body = driver.find_element_by_css_selector('body')
        body.send_keys(Keys.PAGE_DOWN)
        print(i)

    time.sleep(2)

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')

    scope = 20
    send_data = {}
    end_data2 = []
    # 0. 이름 추출
    author = soup.select(
        '#mount_0_0 > div > div:nth-child(1) > div.rq0escxv.l9j0dhe7.du4w35lb > div.rq0escxv.l9j0dhe7.du4w35lb > div > div > div.j83agx80.cbu4d94t.d6urw2fd.dp1hu0rb.l9j0dhe7.du4w35lb > div.rq0escxv.l9j0dhe7.du4w35lb.j83agx80.pfnyh3mw.jifvfom9.gs1a9yip.owycx6da.btwxx1t3.buofh1pr.dp1hu0rb.ka73uehy > div.rq0escxv.l9j0dhe7.du4w35lb.j83agx80.cbu4d94t.g5gj957u.d2edcug0.hpfvmrgz.rj1gh0hx.buofh1pr.dp1hu0rb > div > div > div > div > div > div > div > div > div > div > div > div > div.discj3wi.hv4rvrfc.qt6c0cv9.dati1w0a > div > div.hpfvmrgz.g5gj957u.buofh1pr.rj1gh0hx.o8rfisnq > div > div:nth-child(1) > span > span > a > span:nth-child(1)'
    )

    # 1. text만 뽑으려면 자식 태그(date)를 제외해서 text만 추출 해야함 ---> decompose() 등등 몇개 시험해봤는데 잘 안됨 왜 안되지..?

    text_data = soup.select(
        '#mount_0_0 > div > div:nth-child(1) > div.rq0escxv.l9j0dhe7.du4w35lb > div.rq0escxv.l9j0dhe7.du4w35lb > div > div > div.j83agx80.cbu4d94t.d6urw2fd.dp1hu0rb.l9j0dhe7.du4w35lb > div.rq0escxv.l9j0dhe7.du4w35lb.j83agx80.pfnyh3mw.jifvfom9.gs1a9yip.owycx6da.btwxx1t3.buofh1pr.dp1hu0rb.ka73uehy > div.rq0escxv.l9j0dhe7.du4w35lb.j83agx80.cbu4d94t.g5gj957u.d2edcug0.hpfvmrgz.rj1gh0hx.buofh1pr.dp1hu0rb > div > div > div > div > div > div > div > div > div > div > div > div > div.jb3vyjys.hv4rvrfc.ihqw7lf3.dati1w0a > a > div > div.hpfvmrgz.buofh1pr > span > span'
    )

    # 시간 추출
    date = soup.select(
        '#mount_0_0 > div > div:nth-child(1) > div.rq0escxv.l9j0dhe7.du4w35lb > div.rq0escxv.l9j0dhe7.du4w35lb > div > div > div.j83agx80.cbu4d94t.d6urw2fd.dp1hu0rb.l9j0dhe7.du4w35lb > div.rq0escxv.l9j0dhe7.du4w35lb.j83agx80.pfnyh3mw.jifvfom9.gs1a9yip.owycx6da.btwxx1t3.buofh1pr.dp1hu0rb.ka73uehy > div.rq0escxv.l9j0dhe7.du4w35lb.j83agx80.cbu4d94t.g5gj957u.d2edcug0.hpfvmrgz.rj1gh0hx.buofh1pr.dp1hu0rb > div > div > div > div > div > div > div > div > div > div > div > div > div.jb3vyjys.hv4rvrfc.ihqw7lf3.dati1w0a > a > div > div.hpfvmrgz.buofh1pr > span > span > span'
    )
    for a in range(scope):
        author2 = author[a].text

        text_data2 = text_data[a].text
        text_data3 = text_data2[9:]

        response = natural_language_understanding.analyze(
            text=text_data3,
            features=Features(entities=EntitiesOptions(emotion=False),
                              categories=EntitiesOptions(emotion=False, ),
                              semantic_roles=EntitiesOptions(
                                  emotion=False,
                                  sentiment=False,
                              ),
                              keywords=KeywordsOptions(
                                  emotion=False,
                                  sentiment=False,
                              ))).get_result()
        for re in response['entities']:
            if re['type'] == "Location":
                send_data['location'] = re['text']
            else:
                send_data['location'] = "Unknown"

        for re1 in response['categories']:
            send_data['categorized'] = re1['label']
            send_data['score'] = re1['score']

        send_data['author'] = author2
        send_data['title'] = a
        send_data['contents'] = text_data3
        # send_data['created'] = text_data2.created_at
        send_data['published'] = datetime.datetime.now()

        dictionary_copy = send_data.copy()
        end_data2.append(dictionary_copy)

    # print(end_data2)

    return end_data2
Esempio n. 18
0
def get_song_metadata(artist, song_title):
    url = song_stripper(artist, song_title)
    response = natural_language_understanding.analyze(
        url=url, features=Features(metadata=MetadataOptions())).get_result()
    return json.dumps(response, indent=2)
Esempio n. 19
0
authenticator = IAMAuthenticator(
    'izOZcF-yOaL5TEyRQQX5en3HtNYKlEUaoYTyhwJ3gnVv')
natural_language_understanding = NaturalLanguageUnderstandingV1(
    version='2020-08-01', authenticator=authenticator)

natural_language_understanding.set_service_url(
    'https://api.us-south.natural-language-understanding.watson.cloud.ibm.com')

# Iterate over each participant
for index, row in df.iterrows():
    i = 0
    results = []
    # Iterate over each answer for a given participant
    for col in row:
        if i == 0:  # Get the participant's name
            name = col
            i += 1
            continue

        response = natural_language_understanding.analyze(
            language='en',
            text=col,
            features=Features(entities=EntitiesOptions(emotion=True,
                                                       sentiment=True,
                                                       limit=2),
                              keywords=KeywordsOptions(emotion=True,
                                                       sentiment=True,
                                                       limit=2))).get_result()
        results.append(response)
    write_to_csv(name, results)
Esempio n. 20
0
def instagram_crawler():

    #최초 게시글 클릭 후 오른쪽으로 넘겨주는 함수(반복)
    def move_next(driver):                                                                          #옆 게시물 클릭하여 이동해주는 함수
        right = driver.find_element_by_css_selector('a._65Bje.coreSpriteRightPaginationArrow')
        right.click()
        time.sleep(1)
    def insta_searching(word):                                                                      # word라는 매개변수를 받는 insta_searching 이라는 함수 생성
        url = 'https://www.instagram.com/explore/tags/' + word
        return url

    def select_recent_post(driver):
        first = driver.find_elements_by_css_selector('div._9AhH0')                                  #driver.find_elements_by_css_selector 여러개 배열로 받아와 최근게시물 선택
        first[9].click()
        time.sleep(3)  # 로딩을 위해 3초 대기

    def get_content(driver):                                                                        #데이터를 받아와준다
        html = driver.page_source
        soup = BeautifulSoup(html, 'lxml')
        data = {}
        try:#게시글 본문
            data['content'] = soup.select('div.C4VMK > span')[0].text
        except:
            data['content'] = ''
        data['tags'] = re.findall(r'#[^\s#,\\]+', data['content'])
        try:#게시글 날짜(그리니치 시간대 기준)
            data['date'] = soup.select('time._1o9PC.Nzb55')[0]['datetime'][:10]
        except:
            data['date'] = ''
        try:#좋아요수
            data['like'] = int(soup.select('div.Nm9Fw > button')[0].text[4:-1])
        except:
            data['like'] = 0
        try:#장소 있는 경우만
            data['place'] = soup.select('div.JF9hh')[0].text
        except:
            data['place'] = ''
        try:#사용자 id 저장
            data['user'] = soup.select('div.e1e1d')[0].text
        except:
            data['user'] = ''
        try:#게시글 이미지 저장
            data['imgUrl'] = soup.select('div.KL4Bh>img')['src']
            print(data['imgUrl'])
        except:
            data['user'] = ''

        return data

    # 로그인해주는 함수
    def instagram_login(id, password):
        login_section = '//*[@id="react-root"]/section/nav/div[2]/div/div/div[3]/div/span/a[1]/button'         #브라우저 켜질 때 마다 경우가 다르다 예외처리 해주깅
        driver.find_element_by_xpath(login_section).click()
        time.sleep(3)
        elem_login = driver.find_element_by_name('username')
        elem_login.clear()
        elem_login.send_keys(id)
        elem_login = driver.find_element_by_name('password')
        elem_login.clear()
        elem_login.send_keys(password)
        time.sleep(2)
        xpath = """//*[@id="loginForm"]/div/div[3]"""
        driver.find_element_by_xpath(xpath).click()
        time.sleep(4)
        xpath1 = """//*[@id="react-root"]/section/main/div/div/div/div/button"""
        driver.find_element_by_xpath(xpath1).click()
        time.sleep(4)

    def ibm_watson_set():
        authenticator = IAMAuthenticator('R_CsLMA0DRULvscHYVRZjmHLaF6uvWjScH_T-AefknQ7')
        natural_language_understanding = NaturalLanguageUnderstandingV1(version='2019-07-12',
                                                                        authenticator=authenticator)
        natural_language_understanding.set_service_url(
            'https://api.kr-seo.natural-language-understanding.watson.cloud.ibm.com/instances/7b8701eb-a403-429f-b9c6-e384776c70d3')

        return natural_language_understanding

    #-------------------------------------------------------------------------------------------------------------------------------------------#

    send_data = {}
    end_data = []
    results = []                                                                                        #크롤링결과 담을 변수
    #target = 9                                                                                          # 크롤링할 게시물 수

    driver = webdriver.Chrome("C:/Users/yechan/hanium/main/chromedriver.exe")                #진황 로컬 파일경로 #driver = webdriver.Chrome("C:\\Users\\chan\\Desktop\\Coding\\cmder\\main\\chromedriver.exe")       #찬우 로컬 파일경로
    keyword = '가락동맛집'                                                                                     #검색키워드
    url = insta_searching(keyword)
    driver.get(url)
    time.sleep(4)

    natural_language_understanding = ibm_watson_set()
    instagram_login('jiiinang', 'chl2425!?')                                                            #로그인함수 호출 => 아이디 비밀번호 매개변수
    select_recent_post(driver)

    #for i in range(target):                                                #갯수로 게시글 크롤링해올때
    #    data = get_content(driver)  # 게시물 정보 가져오기
    #    results.append(data)
    #    if i != (target - 1):
    #        move_next(driver)

    while 1:                                                                        #배치돌릴때 해당날짜만 가져올때
         data = get_content(driver)  # 게시물 정보 가져오기
         results.append(data)

         if data['date'] == "2020-10-29":        #한국 업로드 시간과 해외 업로드시간이 차이가 나서 기준을 정해야 할듯 현 기준은 해외기준은 그리니치 표준시로 되어있음
             move_next(driver)
         else:
             break

    for i, insta in enumerate(results):
        send_data['location'] = "Unknown"
        send_data['categorized'] = ""
        send_data['score'] = 0

        response = natural_language_understanding.analyze(
            text=insta['content'],                                  #태그가 아직 콘텐츠에 있음 *수정
            features=Features(
                entities=EntitiesOptions(emotion=False),
                categories=EntitiesOptions(emotion=False, ),
                semantic_roles=EntitiesOptions(emotion=False, sentiment=False, ),
                keywords=KeywordsOptions(emotion=False, sentiment=False,)
            )
        ).get_result()

        for res in response['entities']:
            if res['type'] == "Location":
                send_data['location'] = res['text']
            else:
                send_data['location'] = "Unknown"

        for res1 in response['categories']:
            send_data['categorized'] = res1['label']
            send_data['score'] = res1['score']

        send_data['author'] = insta['user']
        send_data['title'] = insta['user']
        send_data['contents'] = insta['content']

        dictionary_copy = send_data.copy()
        end_data.append(dictionary_copy)
        print(i);

    return end_data
Esempio n. 21
0
def relevancy_dict(chunk):
    service = NLU(version='2018-03-16', url='https://gateway.watsonplatform.net/natural-language-understanding/api', iam_apikey='#########')
    response = service.analyze(text=chunk, features=Features(entities=EntitiesOptions(), keywords=KeywordsOptions())).get_result()
    analysis = json.dumps(response, indent=2)
    return json.loads(analysis)
Esempio n. 22
0
import json
from ibm_watson import NaturalLanguageUnderstandingV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
from ibm_watson.natural_language_understanding_v1 import Features, EmotionOptions

authenticator = IAMAuthenticator(
    'cpAkOt7XOwl4j31ALIc1EAVyxeUffuNgSax52B-DIIv5')
natural_language_understanding = NaturalLanguageUnderstandingV1(
    version='2020-08-01', authenticator=authenticator)

natural_language_understanding.set_service_url(
    'https://api.kr-seo.natural-language-understanding.watson.cloud.ibm.com/instances/25f8b00d-86f0-482a-8b89-7a056f5575da'
)

response = natural_language_understanding.analyze(
    html=
    "<html><head><title>Fruits</title></head><body><h1>Apples and Oranges</h1><p>I love apples! I don't like oranges.</p></body></html>",
    features=Features(emotion=EmotionOptions(
        targets=['apples', 'oranges']))).get_result()

print(json.dumps(response, indent=2))
Esempio n. 23
0
            rep = [data['results'][0]['geometry']['location']['lat'],data['results'][0]['geometry']['location']['lng']]

            return f'{rep[0]}~{rep[1]}'

        else:
            print('TOO FAR!!!')
            return None

    else:
        return None


# Text parsing constants
EMERGENCIES_LUT = ['fire', 'earthquake', 'flood', 'trapped', 'stuck', 'injured', 'drowning', 'drown']
DISPATCH_LUT = ['helicopter', 'helo', 'backup', 'medivac']
FEATURES = Features(keywords=KeywordsOptions())


def parse_sentance(msg, lat, lon):
    '''
    Parse an input sentance using a combination of IBM Watson's smart keyword searching,
    vanilla pattern recognition for emergencies and dispatches, and the Google Maps API
    for locations.
    '''
    natural_language_understanding = NaturalLanguageUnderstandingV1(
                version='2019-07-12',
                iam_apikey='4q60I1qoSwbml9Heu_afwGa0uzw8iAo175qLdOJjX4J-',
                url='https://gateway-syd.watsonplatform.net/natural-language-understanding/api'
            )

    response = natural_language_understanding.analyze(text=msg, features=FEATURES).get_result()
Esempio n. 24
0
def main(args):

    # Parse incoming request headers
    _c_type, p_dict = parse_header(
        args['__ow_headers']['content-type']
    )
    
    # Decode body (base64)
    decoded_string = b64decode(args['__ow_body'])

    # Set Headers for multipart_data parsing
    p_dict['boundary'] = bytes(p_dict['boundary'], "utf-8")
    p_dict['CONTENT-LENGTH'] = len(decoded_string)
    
    # Parse incoming request data
    multipart_data = parse_multipart(
        BytesIO(decoded_string), p_dict
    )
   

    try:
        # Build flac file from stream of bytes
        fo = open("audio_sample.flac", 'wb')
        fo.write(multipart_data.get('audio')[0])
        fo.close()
        teste=False
    except: 
        teste=True
    #teste = multipart_data.items
    #Pegando o Carro
    carro=multipart_data.get('car')[0]
    
    if teste == False: 
         
         # Basic Authentication with Watson STT API
        stt_authenticator = BasicAuthenticator(
        'apikey',
        'apikey'
        )

         #Autenticacao STT
        # Construct a Watson STT client with the authentication object
        stt = SpeechToTextV1(authenticator=stt_authenticator)

     # Set the URL endpoint for your Watson STT client
        stt.set_service_url('https://api.us-south.speech-to-text.watson.cloud.ibm.com')

        # Read audio file and call Watson STT API:
        with open(
            os.path.join(
                os.path.dirname(__file__), './.',
                'audio_sample.flac'
            ), 'rb'
        ) as audio_file:
            # Transcribe the audio.flac with Watson STT
            # Recognize method API reference: 
            # https://cloud.ibm.com/apidocs/speech-to-text?code=python#recognize
            stt_result = stt.recognize(
                audio=audio_file,
                content_type='audio/flac',
                model='pt-BR_BroadbandModel'
            ).get_result()

        authenticator_nlu = BasicAuthenticator(
        'apikey',
        'apikey'
        )
        natural_language_understanding = NaturalLanguageUnderstandingV1(version='2020-09-16',authenticator=authenticator_nlu)
        natural_language_understanding.set_service_url('https://api.us-south.natural-language-understanding.watson.cloud.ibm.com')

        texto_stt=stt_result['results'][0]['alternatives'][0]['transcript']
        try:
            nlu_resp = natural_language_understanding.analyze(text=texto_stt,features=Features(entities=EntitiesOptions(sentiment=True, model ='54f2d12a-54fb-4683-b89f-c76c8b93de3f'))).get_result()
        except ApiException as ex:
            print ("Method failed with status code " + str(ex.code) + ": " + ex.message)

    elif teste == True : 
        
        #Pegando o Text 
        texto=multipart_data.get('text')[0]
        carro=multipart_data.get('car')[0]

        authenticator_nlu = BasicAuthenticator(
        'apikey',
        'apikey'
        )
        natural_language_understanding = NaturalLanguageUnderstandingV1(version='2020-09-16',authenticator=authenticator_nlu)
        natural_language_understanding.set_service_url('https://api.us-south.natural-language-understanding.watson.cloud.ibm.com')

        #Definindo features  
        try:
            nlu_resp = natural_language_understanding.analyze(text=texto,features=Features(entities=EntitiesOptions(sentiment=True, model ='54f2d12a-54fb-4683-b89f-c76c8b93de3f'))).get_result()
        except ApiException as ex:
            print ("Method failed with status code " + str(ex.code) + ": " + ex.message)

 
    sent_rec=[]
    sent_json=[]
    score_rec=[]
    score_json=[]
    ent_rec=[]
    ent_json=[]
    ment_json=[]
    #Pegando a lista de sentimentos negativos
    try:
        for x in range(50):
            aux=nlu_resp['entities'][x]['sentiment']['label']
            sent_json.append(nlu_resp['entities'][x]['sentiment']['label'])
            score_json.append(nlu_resp['entities'][x]['sentiment']['score'])
            ent_json.append(nlu_resp['entities'][x]['type'])
            ment_json.append(nlu_resp['entities'][x]['text'])
        
            #print(aux)
            if  aux != 'neutral':
                if aux !='positive':
                    sent_rec.append(nlu_resp['entities'][x]['sentiment']['label'])
                    score_rec.append(nlu_resp['entities'][x]['sentiment']['score'])
                    ent_rec.append(nlu_resp['entities'][x]['type'])
                    #print("entrou")
        
    except:
        saiu=1

    #lista de carros que podemos usar 
    lista= ["FIAT 500","DUCATO","ARGO","FIORINO","MAREA","RENEGADE","CRONOS"]
    lista_seg_op=["TORO","ARGO","DUCATO","FIAT 500","CRONOS","CRONOS","ARGO"]
    lista_prioridade=["SEGURANCA","CONSUMO","DESEMPENHO","MANUTENCAO","CONFORTO","DESIGN","ACESSORIOS"]

    for x in range(len(lista)):
        if carro == lista[x]:
            lista[x]=lista_seg_op[x]

    #Decidindo qual carro escolher 
    if sent_rec !=[]:

        #entidade.append("MANUTENCAO")
        #Sentimento.append(-1)
        #cont=0
        entidade_aux=0
        sent_aux=0

        for x in range(len(score_rec)):
            dif=abs(sent_aux-score_rec[x])

            if dif > 0.1:
                if score_rec[x] < sent_aux:
                    sent_aux= score_rec[x]
                    entidade_aux=ent_rec[x]
                    print(sent_aux,entidade_aux)
            elif dif < 0.1:
            #Desempate
                #print("aqui")
                for y in range(len(lista)):
                    if entidade_aux == lista_prioridade[y]:
                        sent_aux=sent_aux
                        entidade_aux=entidade_aux
                    elif ent_rec[x] == lista_prioridade[y]:
                        sent_aux= score_rec[x]
                        entidade_aux=ent_rec[x]
        
        for x in range(len(lista)):
            if lista_prioridade[x] == entidade_aux:
                sugest=lista[x]
    else:
        sugest=""

    list_json=[]
    for x in range(len(sent_json)):
        list_json.append({"entity":ent_json[x], "sentiment": score_json[x],"mention": ment_json[x]})

    return {
        "recommendation":sugest,
        "entities":list_json
        
        
    }
Esempio n. 25
0
def n(site='www.wsj.com/news/markets'):
    response = natural_language_understanding.analyze(
        url=site,
        features=Features(sentiment=SentimentOptions(
            targets=['stocks']))).get_result()
    return response
Esempio n. 26
0
def prioritize_health(sentence, model):
    priority_dict = {}
    with open("priority_dict.csv", "r") as f:
        for line in f:
            # print(line.strip().split(","))
            comma_split = line.strip().split(",")
            symptom = ",".join(comma_split[:-1])
            priority = int(comma_split[-1])
            print(symptom, priority)
            priority_dict[symptom] = priority
    symptoms = priority_dict.keys()
    natural_language_understanding = NaturalLanguageUnderstandingV1(
        version='2019-07-12',
        iam_apikey=ibm_key,
        url=
        'https://gateway.watsonplatform.net/natural-language-understanding/api'
    )
    result = natural_language_understanding.analyze(
        language="en",
        text=sentence,
        features=Features(keywords=KeywordsOptions())).get_result()
    print(result)
    keywords = [x['text'] for x in result['keywords']]
    print(keywords)
    #find key words in the sentence. for each word, find nearest keywords based on the avg cosine score
    # print(symptoms)
    closest_symptoms = []  #add one for each keyword
    best_symptom = None
    for cur_keyword in keywords:
        best_avg_sim = 0
        print("Finding closest match for keyword={}".format(cur_keyword))
        test_keyword = cur_keyword.split(" ")[-1]
        print("Test keyword={}".format(test_keyword))
        for cur_symptom in symptoms:
            symptom_tokens = [
                x.lower() for x in nltk.word_tokenize(cur_symptom)
                if not is_useless(x) and x.isalpha()
            ]
            total_sim = 0
            total_cnt = 0
            # print("Symptom={}".format(cur_symptom))
            # print("Symptom tokens={}".format(symptom_tokens))
            for token in symptom_tokens:
                # print("Token={}".format(token))
                if token not in model.vocab or token == test_keyword:
                    # print("Token not found in vocab, skipping")
                    continue
                #compute similarity of keyword and otken
                cur_sim = model.similarity(test_keyword, token)
                # print("cur_sim={}".format(cur_sim))
                if cur_sim < -0.2 or cur_sim > 0.2:
                    total_sim += cur_sim
                    total_cnt += 1
            if total_cnt:
                avg_sim = float(total_sim) / total_cnt
                print("total_sim={},total_cnt={},avg_sim={}".format(
                    total_sim, total_cnt, avg_sim))
                if avg_sim > best_avg_sim:
                    best_avg_sim = avg_sim
                    best_symptom = cur_symptom
        print(best_avg_sim, best_symptom)
    if best_symptom is not None:
        priority_score = priority_dict[best_symptom]
    else:
        priority_score = 0
    res = (keywords, priority_score)
    print("Result={}".format(res))
    return res
Esempio n. 27
0
import json
from ibm_watson import NaturalLanguageUnderstandingV1
from ibm_watson.natural_language_understanding_v1 import Features, EntitiesOptions, KeywordsOptions
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

authenticator = IAMAuthenticator('0WIgFUuKmw3ZU2-sNCLhCjbJEufJWQVY5tx5_GEjCmjj')
service = NaturalLanguageUnderstandingV1(
    version='2018-03-16',
    authenticator=authenticator)
service.set_service_url('https://gateway.watsonplatform.net/natural-language-understanding/api')

response = service.analyze(
    url='http://www.cronicasdeleste.com.uy/Noticias/germ%C3%A1n-cardoso-y-eduardo-elinger-denuncian-que-les-hackearon-las-cuentas.html?no_redirect=true',
    features=Features(entities=EntitiesOptions(),
                      keywords=KeywordsOptions())).get_result()

print(json.dumps(response, indent=2))
Esempio n. 28
0
def categories(text):
    response = natural_language_understanding.analyze(
        text=text,
        features=Features(categories=CategoriesOptions(limit=1))).get_result()

    return json.dumps(response["categories"])
Esempio n. 29
0
import json
from ibm_watson import NaturalLanguageUnderstandingV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
from ibm_watson.natural_language_understanding_v1 import Features, SentimentOptions

authenticator = IAMAuthenticator(
    'PtJi8o9UyO8koiyqOJ-JOQPDFURQI9H8lKI8WzQxHc6Y')
natural_language_understanding = NaturalLanguageUnderstandingV1(
    version='2019-07-12', authenticator=authenticator)

natural_language_understanding.set_service_url(
    'https://api.eu-gb.natural-language-understanding.watson.cloud.ibm.com/instances/17e005b5-ebde-489d-8483-60fbd1ea1cfd'
)

for i in range(936):
    f = open("Uber_Ride_Reviews" + str(i) + ".txt", "r")

    text = f.readline()

    sentiment_analysis = natural_language_understanding.analyze(
        text=text,
        features=Features(sentiment=SentimentOptions())).get_result()
    print(json.dumps(sentiment_analysis, indent=2))
Esempio n. 30
0
 def analyze(self, input):
     return self.service.analyze(
         text=input,
         features=Features(entities=EntitiesOptions(),
                           keywords=KeywordsOptions(),
                           sentiment=SentimentOptions())).get_result()