Beispiel #1
0
def test_example():

    natural_language_understanding = NaturalLanguageUnderstandingV1(
        version='2019-07-12',
        iam_apikey='INJBZJ_fupvtsCswpI9UUOBZZmOYp6vHkHPEAZX_WU31',
        url=
        'https://gateway.watsonplatform.net/natural-language-understanding/api'
    )

    response1 = natural_language_understanding.analyze(
        text=
        'To use MPN support contracts, you need the Access ID and Contract ID for the support contract. ',
        features=Features(semantic_roles=SemanticRolesOptions())).get_result()
    #print(json.dumps(response1, indent=2))

    response2 = natural_language_understanding.analyze(
        text=
        'Demonstrate your proven expertise in delivering quality solutions in one or more specialized areas of business. ',
        features=Features(semantic_roles=SemanticRolesOptions())).get_result()

    #print(json.dumps(response2, indent=2))

    response3 = natural_language_understanding.analyze(
        url='https://docs.microsoft.com/en-us/partner-center/',
        features=Features(semantic_roles=SemanticRolesOptions())).get_result()

    print(json.dumps(response3, indent=2))
Beispiel #2
0
def analyzeSampleMessages_Default(questions_problems_text, nlu):
    results_list = []
    for message in questions_problems_text:
        result = nlu.analyze(
            text=message,
            features=Features(
                keywords=KeywordsOptions(),
                semantic_roles=SemanticRolesOptions())).get_result()
        actions_arr = []
        keywords_arr = []
        for keyword in result["keywords"]:
            keywords_arr.append(keyword["text"])
        if ("semantic_roles" in result):
            for semantic_result in result["semantic_roles"]:
                if ("action" in semantic_result):
                    actions_arr.append(semantic_result["action"]["normalized"])
        results_list.append({
            "header":
            "-------------------------------------------------------------",
            "message": message,
            "actions": actions_arr,
            "keywords": keywords_arr,
            "spacer": ""
        })
    return results_list
Beispiel #3
0
def test_watson_relations_semantics():
    #print('hi')
    natural_language_understanding = NaturalLanguageUnderstandingV1(
        version='2019-07-12',
        iam_apikey='INJBZJ_fupvtsCswpI9UUOBZZmOYp6vHkHPEAZX_WU31',
        url=
        'https://gateway.watsonplatform.net/natural-language-understanding/api'
    )
    string = 'Add licenses or services to an existing subscription'

    #print('xx')
    for i in read_output_relations_semantics:
        #print('hello')
        url_link = i[0]
        print(url_link)

        #top relations; this is generic just from watson's current library but we can customize later
        relations_response = natural_language_understanding.analyze(
            url=url_link,
            features=Features(relations=RelationsOptions())).get_result()
        output.append(json.dumps(relations_response, indent=2))
        print('RELATIONS')
        print(json.dumps(relations_response, indent=2))

        semantics_response = natural_language_understanding.analyze(
            url=url_link,
            features=Features(
                semantic_roles=SemanticRolesOptions())).get_result()
        output.append(json.dumps(semantics_response, indent=2))
        print('SEMANTICS')
        print(json.dumps(semantics_response, indent=2))

    Tools.write_csv_data('./watson_output_relations_semantics.csv', output)
Beispiel #4
0
def keyword_extraction(user_input):
    user_input = user_input.strip()
    splitted = user_input.split()
    subject = ''
    if (len(splitted) > 3):
        keywords = natural_language_understanding.analyze(
            text=user_input,
            features=Features(
                semantic_roles=SemanticRolesOptions())).get_result()
        #print(json.dumps(keywords,indent=2))

        l = keywords["semantic_roles"]
        if (len(l) != 0):
            if "i" in splitted:
                semantic_roles = keywords["semantic_roles"]
                ob = semantic_roles[0]
                subject = ob["object"]
                subject = subject["text"]
            else:
                semantic_roles = keywords["semantic_roles"]
                sub = semantic_roles[0]
                subject = sub["subject"]
                subject = subject["text"]
        else:
            matcher = Matcher(nlp.vocab)
            pattern = [{'POS': 'NOUN'}]
            matcher.add('NOUN_PATTERN', None, pattern)
            doc = nlp(user_input)
            for token in doc:
                #print(token.text,token.pos_)
                pass
            matches = matcher(doc)
            subs = []
            for match_id, start, end in matches:
                #print("subject: ",doc[start:end].text)
                subs.append(doc[start:end].text)
            subject = ' '.join(subs)

    else:
        matcher = Matcher(nlp.vocab)
        pattern = [{'POS': 'NOUN'}]
        matcher.add('NOUN_PATTERN', None, pattern)
        doc = nlp(user_input)
        for token in doc:
            #print(token.text,token.pos_)
            pass
        matches = matcher(doc)
        subs = []
        for match_id, start, end in matches:
            subs.append(doc[start:end].text)
        subject = ' '.join(subs)

    list_of_sub = subject.split()
    #print(list_of_sub)
    return list_of_sub
def analyze_action(text):
    service = authenticate()
    try:
        response = service.analyze(
            text=text,
            features=Features(
                semantic_roles=SemanticRolesOptions())).get_result()
        raw_resp = response['semantic_roles'][-1]
        action = raw_resp['action']['text']
        if 'object' in raw_resp:
            action += ' ' + raw_resp['object']['text']
    except ApiException as e:
        raise ActionNotFoundException(e) from e
    return action
Beispiel #6
0
def analyze_using_NLU(analysistext):
    """ Extract results from Watson Natural Language Understanding for each news item
    """
    res = dict()
    response = natural_language_understanding.analyze(
        text=analysistext,
        features=Features(sentiment=SentimentOptions(),
                          entities=EntitiesOptions(),
                          keywords=KeywordsOptions(),
                          emotion=EmotionOptions(),
                          concepts=ConceptsOptions(),
                          categories=CategoriesOptions(),
                          semantic_roles=SemanticRolesOptions()))
    res['results'] = response
    return res
    def _make_request():
        from ibm_watson import NaturalLanguageUnderstandingV1
        from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
        from ibm_watson.natural_language_understanding_v1 import Features, CategoriesOptions, ConceptsOptions, EmotionOptions, EntitiesOptions, KeywordsOptions, \
            MetadataOptions, RelationsOptions, SemanticRolesOptions, SentimentOptions, SyntaxOptions, SyntaxOptionsTokens

        # Retrieve the APIKEY for authentication
        apikey = os.environ.get("IBM_API_KEY")
        if apikey is None:
            raise ValueError(
                "Expected apikey in the environment variable 'IBM_API_KEY'")

        # Get the service URL for your IBM Cloud instance
        ibm_cloud_service_url = os.environ.get("IBM_SERVICE_URL")
        if ibm_cloud_service_url is None:
            raise ValueError(
                "Expected IBM cloud service URL in the environment variable 'IBM_SERVICE_URL'"
            )

        # Initialize the authenticator for making requests
        authenticator = IAMAuthenticator(apikey)
        natural_language_understanding = NaturalLanguageUnderstandingV1(
            version='2019-07-12', authenticator=authenticator)

        natural_language_understanding.set_service_url(ibm_cloud_service_url)

        response = natural_language_understanding.analyze(
            url=
            "https://raw.githubusercontent.com/CODAIT/text-extensions-for-pandas/master/resources/holy_grail.txt",
            return_analyzed_text=True,
            features=Features(
                #categories=CategoriesOptions(limit=3),
                #concepts=ConceptsOptions(limit=3),
                #emotion=EmotionOptions(targets=['grail']),
                entities=EntitiesOptions(sentiment=True),
                keywords=KeywordsOptions(sentiment=True, emotion=True),
                #metadata=MetadataOptions(),
                relations=RelationsOptions(),
                semantic_roles=SemanticRolesOptions(),
                #sentiment=SentimentOptions(targets=['Arthur']),
                syntax=SyntaxOptions(sentences=True,
                                     tokens=SyntaxOptionsTokens(
                                         lemma=True,
                                         part_of_speech=True))  # Experimental
            )).get_result()

        return response
Beispiel #8
0
def analyze(url):
    service = NaturalLanguageUnderstandingV1(
        version=config.Config.IBM_VERSION,
        ## url is optional, and defaults to the URL below. Use the correct URL for your region.
        url=config.Config.IBM_URL,
        iam_apikey=config.Config.IBM_API_KEY)

    response = service.analyze(
        url=url,
        # text='what is the application of NLP in web page search?',
        features=Features(categories=CategoriesOptions(),
                          concepts=ConceptsOptions(limit=10),
                          entities=EntitiesOptions(),
                          relations=RelationsOptions(),
                          semantic_roles=SemanticRolesOptions(),
                          keywords=KeywordsOptions()
                          ),
        return_analyzed_text=True,
        clean=True
    ).get_result()

    return response
Beispiel #9
0
def compute_all_features(text):
    response = natural_language_understanding.analyze(
                    text=text,
                    features=Features(entities=EntitiesOptions(),
                                      keywords=KeywordsOptions(sentiment = True,emotion = True),
                                      semantic_roles=SemanticRolesOptions(keywords=True,entities=True),
                                      relations=RelationsOptions()),
                    language='en').get_result()
    key_sent = {}
    for words in response['keywords']:
        key_sent[words['text']] = words['sentiment']['score']
    key_emotion = {}
    for words in response['keywords']:
        key_emotion[words['text']] = words['emotion']
    keylist_rel = {}
    for key in response['keywords']:
        key_ent = key['text']
        rel = key['relevance']
        keylist_rel[key_ent] = rel
    entities_rel = {}
    G = nx.Graph()
    sub = None
    obj = None
    
    for role in response["semantic_roles"]:
        subject = role['subject']['text'].lower()

        max_rel = 0
        max_ent = []
        if('keywords' in role['subject'].keys()):
            for ent in role['subject']['keywords']:
                ent_text = ent['text'].lower()

                for dict_key in keylist_rel.keys():
                    if(ent_text in dict_key.lower()):
                            ent_rel =  keylist_rel[dict_key]

                            if(ent_rel>max_rel):
                                max_rel = ent_rel 
                                max_ent = ent
                if(max_rel!=0 and max_ent):
                    sub = max_ent['text']
                    G.add_node(sub)

        verb = role['action']['verb']['text'].lower()

        max_rel = 0
        max_ent = []
        if('object' in role.keys()):
            object = role['object']['text'].lower()
            if('keywords' in role['object'].keys()):
                for obj in role['object']['keywords']:
                    obj_text = obj['text'].lower()

                    for dict_key in keylist_rel.keys():
                        if(obj_text in dict_key.lower()):
                            obj_rel =  keylist_rel[dict_key]

                            if(obj_rel>max_rel):
                                max_rel = obj_rel 
                                max_ent = obj
                    if(max_rel!=0 and max_ent):
                        obj = max_ent['text']
                        G.add_node(obj)
                        if(sub and obj and sub!=obj):
                            G.add_edge(sub,obj)
    return nx.closeness_centrality(G), key_sent, key_emotion
Beispiel #10
0
 def test_semantic_roles(self):
     s = Features(semantic_roles=SemanticRolesOptions())
     assert s._to_dict() == {'semantic_roles': {}}
import json
from ibm_watson import NaturalLanguageUnderstandingV1
from ibm_watson.natural_language_understanding_v1 import Features, SemanticRolesOptions

natural_language_understanding = NaturalLanguageUnderstandingV1(
    version='2019-07-12',
    iam_apikey='1erf8hkp6tJpTAVzh8_F7HmyRTWG6Rn9A567j5lyAcaM',
    url='https://gateway.watsonplatform.net/natural-language-understanding/api'
)

response = natural_language_understanding.analyze(
    text=
    'nothing happen really making together looking around trying you.\\n talking friends anything coming him.\\n probably everything too.\\n really excuse rachel alright forget actually something sounds course okay.\\n started wanted little thought this.\\n supposed okay.\\n getting sorry.\\n happened laughing apartment starts matter listen monica know.\\n chandler right.\\n yeah.\\n birthday pheebs please people everybody phoebe crying amazing anyway believe person picture stupid already thanks thinking remember almost married somebody someone here.\\n phoebe better another telling points out.\\n that.\\n pretty minute things beautiful laughs always enough couple wedding coffee yknow everyone saying thank totally oh.\\n taking second father seeing though entering called tomorrow office leaves bedroom friend monica listens joshua',
    features=Features(semantic_roles=SemanticRolesOptions())).get_result()

print(json.dumps(response, indent=2))
response = natural_language_understanding.analyze(
    text='Who is the president of Brazil?',
    features=Features(
        concepts=ConceptsOptions(),
        emotion=EmotionOptions(),
        entities=EntitiesOptions(),
        sentiment=SentimentOptions(),
    ))

print(json.dumps(response, indent=2))

response = natural_language_understanding.analyze(
    text='Steve Jobs is the founder of Apple',
    features=Features(
        entities=EntitiesOptions(),
        semantic_roles=SemanticRolesOptions(),
    ))

print(json.dumps(response, indent=2))

response = natural_language_understanding.analyze(
    text='Na FIAP, os alunos são muito dedicados.',
    features=Features(
        relations=RelationsOptions(),
        concepts=ConceptsOptions(),
        emotion=EmotionOptions(),
        entities=EntitiesOptions(),
        semantic_roles=SemanticRolesOptions(),
        sentiment=SentimentOptions(),
    ))
import json
from ibm_watson import NaturalLanguageUnderstandingV1
from ibm_watson.natural_language_understanding_v1 \
    import Features, EntitiesOptions, KeywordsOptions, SemanticRolesOptions, SyntaxOptions

natural_language_understanding = NaturalLanguageUnderstandingV1(
    version='2018-11-16',
    ## url is optional, and defaults to the URL below. Use the correct URL for your region.
    url='https://gateway.watsonplatform.net/natural-language-understanding/api',
    #Autenticação do meu server no cloud
    iam_apikey='rA-eX7MNEepZZ8Vi1gz463UVp2aOm10WQFRYCQtfdehJ')

response = natural_language_understanding.analyze(
    #O Texto que tu quer Submeter
    text='Hey Doris, can you pick an coke for me ?',
    #Todas as Features que eu quero usar
    features=Features(entities=EntitiesOptions(emotion=False,
                                               sentiment=False,
                                               limit=2),
                      keywords=KeywordsOptions(emotion=False,
                                               sentiment=False,
                                               limit=2),
                      semantic_roles=SemanticRolesOptions(keywords=True,
                                                          entities=True,
                                                          limit=2),
                      syntax=SyntaxOptions())).get_result()

print(json.dumps(response, indent=2))
Beispiel #14
0
def get_actions_related_to_entities():
    """ 
    FUNCTION: Perform semantic roles (POS tagging or identification of action, subject, object) in titles of articles to categorize action 
    entities with products/general entities they describe (and vice versa - match product/general entities with action entities that describe them)
        Represented by 2 dictionaries:
        - ACTS TO ENTS: which match actions to product/general entities 
                [action: [all event entities in sentence that action describes]]
        - ENTS TO ACTS: match product/general entities to actions that dsscribe them 
    
    STRUCTURE:
    PART 1:First calls IBM Watson semantic roles function to identify action, subject, object in each sentence, 
        but if IBM Watson is unable to perform function, go to PART 2

    PART 2: perform custom POS tagging engine 
        - first analyzes if there is (action + noun) bigrams sentence that does not contain ('or' + action) 
          because that indicates the verb only describes noun directly following it 

            e.g., DOES NOT CONTAIN 'OR': 'Create user accounts and set permissions' -- 'create' only describes 'user accounts' and 'set' only describes 'permissions'
            e.g., CONTAINS 'OR': 'Create, suspend, or cancel customer subscriptions' -- 'create', 'suspend', 'cancel' ALL describe 'customer subscriptions'
        
        - keeps track of the indices of the (action + noun) bigrams 
        - split the POS tag list by the indices and perform analysis within each sublist 
        - concatenates all the action and event entity words by "~" character
        - splits actions by "~" and matches event entities to each individual action 
            ***at the moment, only doing this for actions because usually actions are just one word, so each token in the actions list 
               concatenated by "~" can be assumed to be a different action whereas event entities are often more than one word, currently not 
               enough info on how to split event entities concatenated by "~"
    """

    actions_to_entities_dict = {}
    entities_to_actions_dict = {}

    #print sentences
    for i in read_output:
        print(i[2])

    for i in read_output:
        url_link = i[0]
        title = i[2]
        content = i[3]
        content_sentences = i[3].split('.')
        title_tokens = title.split(' ')

        response = natural_language_understanding.analyze(
            text=title,
            features=Features(
                semantic_roles=SemanticRolesOptions())).get_result()
        json_dict = json.loads(
            json.dumps(response))  #turns json string output into dictionary

        #extract json values
        values_to_actions = []  # [event entities]
        values_to_entities = []  #[action entities]
        subject = ''
        action = ''
        object_string = ''
        entity = ''
        sentence = title

        #-----PART 1: PERFORM IBM WATSON SEMANTIC ROLES----

        if (json_dict['semantic_roles'] != []):
            print('ibm watson')
            sentence = json_dict['semantic_roles'][0]['sentence']
            if (json_dict['semantic_roles'][0]['subject']['text'] is not None):
                subject = json_dict['semantic_roles'][0]['subject']['text']
                print(subject)
            if (json_dict['semantic_roles'][0]['action']['text'] is not None):
                action = json_dict['semantic_roles'][0]['action']['text']
                print(action)
            if (json_dict['semantic_roles'][0]['object']['text'] is not None):
                print(object)
                object_string = json_dict['semantic_roles'][0]['object'][
                    'text']

            entity = entity + '~ ' + subject
            values_to_actions.append(entity)
            values_to_entities.append(action)
            #map each action/verb to the relavant entities; categorize entities to each action
            check_if_action_key_exists(action, actions_to_entities_dict,
                                       values_to_actions)
            check_if_entity_key_exists(entity, entities_to_actions_dict,
                                       values_to_entities)

        # ----- PART 2: PERFORM CUSTOM POS TAGGING IF WATSON FAILS ------
        else:
            tokens = nltk.word_tokenize(title)
            POS_tags = nltk.pos_tag(tokens)
            print(POS_tags)
            action_noun_template_count = 0
            bigram_dict = {}
            bigrams = [
            ]  #keep track of bigrams to compare verb + noun templates
            contains_or_conjunction = False
            index_list = [
            ]  #list of indices by which POS tags will be split by
            index_list.append(0)

            #populate bigrams based on criteria described above
            for i, val in enumerate(POS_tags):
                if (i == len(tokens) - 1):
                    bigrams.append([val[1], ''])
                else:
                    bigrams.append([val[1], POS_tags[i + 1][1]])
                    if (val[0].lower() == 'or' and
                        (POS_tags[i + 1][1]
                         in ('VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ') or
                         POS_tags[i + 1][1] in action_entity_output_string)):
                        contains_or_conjunction = True

            if contains_or_conjunction == False:
                for i, val in enumerate(
                        bigrams):  #i is index of POS tag in sentence
                    first = val[0]
                    second = val[1]
                    if ((first in ('VB', 'VBD', 'VBG', 'VBN', 'VBP')
                         or first in action_entity_output_string)
                            and second in ('NN', 'NNP', 'NNPS', 'NNS', 'CD')):
                        if (i != 0):
                            index_list.append(i)

            index_list.append(len(tokens) - 1)
            #go through POS_tags based on split indices to populate the dictionaries
            count = 0
            for i, index in enumerate(index_list):
                if (i < len(index_list) - 1):
                    if (len(index_list) > 2):
                        action = ''
                        entity = ''
                        values_to_actions = []
                        values_to_entities = []
                    for (word,
                         tag) in POS_tags[index_list[i]:index_list[i + 1]]:
                        if (word.lower() in action_entity_output_string):
                            if (
                                    word == tokens[0]
                            ):  #if first word of sentence is in action library, highly likely it is an action (nltk sometimes tags verbs as nouns)
                                action = action + '~ ' + word
                            else:
                                if (
                                        tag not in ('NNP', 'NNPS', 'NNS', 'NN',
                                                    'CD')
                                ):  #need further clarification because POS tag isn't always most accurate
                                    action = action + '~ ' + word
                        elif (tag in ('NNP', 'NNPS', 'NNS', 'NN', 'CD') and
                              word.lower() not in action_entity_output_string):
                            entity = entity + '~ ' + word  #could be more than one product/general entity or entity is a phrase not a single word
                    action = action.strip('~')
                    print(action)
                    entity = entity.strip('~')
                    if (
                            '~' in action
                    ):  #handles scenarios in which multiple verbs are identified in sentence and we wamt to map each of them to the same noun entities identified
                        #e.g., "Create, suspend, or cancel customer subscriptions - Partner Center"-- we want ['Create' : [customer subscriptions, Partner Center]], ['suspend': [customer subscriptions, Partner Center]], ['cancel': [customer subscriptions, Partner Center]]
                        action_list = action.split('~')
                        for action_item in action_list:
                            values_to_entities.append(action_item)
                        values_to_actions.append(entity)
                        if (entity != ''):
                            check_if_entity_key_exists(
                                entity, entities_to_actions_dict,
                                values_to_entities)
                        for action_item in action_list:
                            if (action_item != ''):  #see helper function
                                check_if_action_key_exists(
                                    action_item, actions_to_entities_dict,
                                    values_to_actions)

                    else:
                        values_to_entities.append(action)
                        values_to_actions.append(entity)
                        if (action != ''):
                            check_if_action_key_exists(
                                action, actions_to_entities_dict,
                                values_to_actions)
                        if (entity != ''):

                            check_if_entity_key_exists(
                                entity, entities_to_actions_dict,
                                values_to_entities)
                        #print(values_to_actions)
    print('---- OUTPUT ----')
    print('ACTIONS MAPPED TO GEN/PROD ENTITIES')
    print(actions_to_entities_dict)
    Tools.write_csv_data('./watson_output_actionsMap.csv',
                         actions_to_entities_dict)
    print('GEN/PRDO ENTITIES MAPPED TO ACTIONS')
    print(entities_to_actions_dict)
    Tools.write_csv_data('./watson_output_entitiesMap.csv',
                         entities_to_actions_dict)