Ejemplo n.º 1
0
    def on_data(self, data):
        try:
            #json_data = status._json
            tweet = json.loads(data)
            textdata = tweet['text']
            # print textdata
            if tweet['coordinates']:
                print tweet['coordinates']
            #response = queue.send_message(MessageBody=tweet['text'])
            # print (tweet)
            producer = KafkaProducer(
                value_serializer=lambda v: json.dumps(v).encode('utf-8'))
            producer.send('fizzbuzz', textdata)

            consumer = KafkaConsumer(bootstrap_servers='localhost:9092',
                                     auto_offset_reset='earliest')

            consumer.subscribe(['fizzbuzz'])
            for message in consumer:
                print(message)

            nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1(
                version='2017-02-27', username='', password='')
            nlu.analyze(text=textdata, features=[features.Sentiment()])

        except Exception as e:
            #print("exception: "+e)
            pass

        def on_error(self, status):
            print(status)
            return True
Ejemplo n.º 2
0
def run_watson_nlu():
    files = glob.glob('work/bug-*.json')
    (user, passwd) = get_auth()
    for fname in files:
        with open(fname) as f:
            LOG.debug("Processing %s" % fname)
            bug = json.loads(f.read())
            num = bug["link"].split("/")[-1]
            with open("work/res-%s.json" % num, "w") as out:
                nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1(
                    version='2017-02-27', username=user, password=passwd)
                res = nlu.analyze(text=bug["comments"],
                                  features=[
                                      features.Concepts(),
                                      features.Keywords(),
                                      features.Emotion(),
                                      features.Sentiment(),
                                  ])
                output = {
                    "link": bug["link"],
                    "tags": bug["tags"],
                    "importance": bug["importance"],
                    "length": len(bug["comments"]),
                    "results": res
                }
                out.write(json.dumps(output, indent=4))
Ejemplo n.º 3
0
def sentiment(tips):
    # Helper function to return text sentiment analysis
    # Load Watson credentials
    username=os.environ.get('NLU_USERNAME')
    password = os.environ.get('NLU_PASSWORD')
    nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1(version='2017-02-27',
        username=username, password=password)
    output = nlu.analyze(text=tips, features=[features.Sentiment()])
    return output['sentiment']['document']['score']
Ejemplo n.º 4
0
def extract_data(text):
    # Use Watson's NLU API to extract the keywords, entities and concepts from a text
    bm_username = "******"
    bm_password = "******"

    nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1(
        version='2017-02-27', username=bm_username, password=bm_password)
    ents = nlu.analyze(text=text,
                       features=[
                           features.Entities(),
                           features.Keywords(),
                           features.Concepts()
                       ])

    ents["tweet"] = text
    return ents
Ejemplo n.º 5
0
 def nlp_parse(usr, password):
     try:
         return watson_developer_cloud.NaturalLanguageUnderstandingV1(
             version='2017-02-27', username=usr, password=password)
     except:
         return []
Ejemplo n.º 6
0
# coding: utf-8

# In[4]:

import sys
import os
import json
sys.path.append(os.path.join(os.getcwd(), '..'))
import watson_developer_cloud
import watson_developer_cloud.natural_language_understanding.features.v1 as \
    features

# In[3]:

nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1(
    version='2017-02-27',
    username='******',
    password='******')

# In[6]:

response = nlu.analyze(
    text='Bruce Banner is the Hulk and Bruce Wayne is BATMAN! '
    'Superman fears not Banner, but Wayne.',
    features=[features.Entities(), features.Keywords()])

print(json.dumps(response, indent=2))

# In[ ]:
# Watson Visual Recognition
# VISUAL_RECOGNITION_API_KEY = '<add_vr_api_key>'

# Watson Natural Launguage Understanding (NLU)
NATURAL_LANGUAGE_UNDERSTANDING_USERNAME = '******'
NATURAL_LANGUAGE_UNDERSTANDING_PASSWORD = '******'

# Watson Tone Analyzer
TONE_ANALYZER_USERNAME = '******'
TONE_ANALYZER_PASSWORD = '******'

# Create the Watson clients

nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1(
    version='2017-02-27',
    username=NATURAL_LANGUAGE_UNDERSTANDING_USERNAME,
    password=NATURAL_LANGUAGE_UNDERSTANDING_PASSWORD)
# tone_analyzer = ToneAnalyzerV3(version='2016-05-19',
#                                username=TONE_ANALYZER_USERNAME,
#                                password=TONE_ANALYZER_PASSWORD)

# visual_recognition = VisualRecognitionV3(version='2018-05-22', iam_apikey=VISUAL_RECOGNITION_API_KEY)

# **Insert to code > Insert pandas DataFrame**
# df_data_1 = pd.read_csv('C:\\Users\\Rafael\\Downloads\\us-consumer-finance-complaint-database\\consumer_complaints_only_with_narrative_100.csv', encoding='latin-1')#, low_memory=False)
df_data_1 = pd.read_csv(
    'C:\\Users\\Rafael\\Downloads\\us-consumer-finance-complaint-database\\consumer_complaints_only_with_narrative_100.csv',
    sep=',',
    error_bad_lines=False,
    index_col=False,
    dtype="unicode")
Ejemplo n.º 8
0
# coding: utf-8

# In[4]:

import sys
import os
sys.path.append(os.path.join(os.getcwd(), '..'))
import watson_developer_cloud
import watson_developer_cloud.natural_language_understanding.features.v1 as \
    features

# In[3]:

nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1(
    version='2016-01-23', username='******', password='******')

# In[6]:

nlu.analyze(text='this is my experimental text.  Bruce Banner is the Hulk'
            ' and Bruce Wayne is BATMAN! Superman fears not Banner, '
            'but Wayne.',
            features=[features.Entities(),
                      features.Keywords()])

# In[ ]:
Ejemplo n.º 9
0
import sys
import os
import csv
import codecs

sys.path.append(os.path.join(os.getcwd(), '..'))
import watson_developer_cloud
import watson_developer_cloud.natural_language_understanding.features.v1 as features

nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1(
    version='2017-02-27',
    username='******',
    password='******')

Channels = [
    "cityworld", "cnn", "dailytime", "empire", "foreignpolicy", "fox",
    "goneleft", "mutiny", "nbc", "news70", "newspolitics", "nytimes",
    "realnews", "truth", "usgreat", "webdaily", "whitehouse"
]

#Two output files to title and content sentiments
Analyze_Files = ["SentimentTitle.csv", "SentimentContent.csv"]

#for channel in Channels:
# index at 1 and 3 is Title and Content in FullData.csv; we are toggling for them
#index = 1
index = 3
for value in Analyze_Files:
    #for each newspaper's fullData.csv content and title we are doing analyses
    with codecs.open("CleanData" + value, 'w', "utf-8-sig") as outputFile:
Ejemplo n.º 10
0
                end = -1
                while j < len(results):
                    if (results[j] == '"' and start == -1):
                        start = j
                    elif (results[j] == '"' and end == -1):
                        end = j
                        classes += [results[start + 1:end]]
                        break
                    j += 1
                i = results.find(label, i + 1)

            # print(classes)
            s = ""
            for k in classes:
                nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1(
                    version='2017-02-27',
                    username='******',
                    password='******')
                h = nlu.analyze(text=k, features=[features.Entities()])

            names = []
            if "person" in classes:
                face_results = None
                with open('stillframe.jpg', 'rb') as images_file2:
                    json_obj2 = visual_recognition.detect_faces(
                        images_file=images_file2)
                    face_results = str(json.dumps(json_obj2, indent=2))

                    name = '"name"'
                    i2 = results.find(name)
                    while i2 != -1:
                        j2 = i2 + len(label)
Ejemplo n.º 11
0
    def identify_artist(self):
        # os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.abspath(os.getcwd() + cr.GOOGLE_APP_NLP_CRED_PATH)
        # print(os.environ['GOOGLE_APPLICATION_CREDENTIALS'])

        # Using Google Cloud API
        # from googleapiclient import discovery
        # import httplib2
        # from oauth2client.client import GoogleCredentials
        #
        # DISCOVERY_URL = cr.GOOGLE_APP_NLP_DISCOVERY_URL
        #
        # http = httplib2.Http()
        #
        # credentials =
        # GoogleCredentials.get_application_default().create_scoped(['https://www.googleapis.com/auth/cloud-platform'])
        # credentials.authorize(http)
        #
        # service = discovery.build('language', 'v1beta2', http=http, discoveryServiceUrl=DISCOVERY_URL)
        #
        # service_request = service.documents().annotateText(
        #     body={
        #         'document': {
        #             'type': 'PLAIN_TEXT',
        #             'content': speech
        #         },
        #         'features': {
        #             'extractSyntax': False,
        #             'extractEntities': True,
        #             'extractDocumentSentiment': False
        #         }
        #     })
        #
        # response = service_request.execute()

        # Using IBM Watson Natural Language Understanding API

        nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1(version=self.WDC_NLUV1_VERSION,
                                                                    username=self.WDC_NLUV1_USERNAME,
                                                                    password=self.WDC_NLUV1_PWD)
        ret = nlu.analyze(text=self.speech, features=[features.Entities(), features.Keywords()])

        pprint(ret)

        # Going forward with IBM Watson APIs
        keywords = ret['keywords']
        key = []
        for elem in keywords:
            key.append(elem['text'])

        # If Play is present in any of Keywords:
        # If yes, check if any Artist or Music group present in other Keywords.
        # If yes, fetch other keywords, and use that as text to search for song of particular artist!
        play_action = False

        for elem in key:
            if 'play' in elem.lower():
                play_action = True
        if not play_action:
            if 'play' in self.speech.lower():
                play_action = True

        # If Play_Action is True, we need to call search_video function
        if play_action:
            search_query = ''

            entities = ret['entities']
            entities = entities[0]

            ty = entities['type']
            text = entities['text']

            if ty == 'MusicGroup' or ty == 'Person' or ty == 'Company':
                # Means it corresponds to particular artist of Musical Band!
                search_query += text + ' '
                for elem in key:
                    if 'play' not in elem.lower():
                        if text not in elem.lower():
                            search_query += elem + ' '
            print(search_query)

        return search_query
Ejemplo n.º 12
0
import sys
import os

sys.path.append(os.path.join(os.getcwd(), '..'))
import watson_developer_cloud
import watson_developer_cloud.natural_language_understanding.features.v1 as features

nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1(
    version='2017-02-27', username='******', password='******')
h = nlu.analyze(text='apple',
                features=[features.Entities(),
                          features.Keywords()])
print(h)
Ejemplo n.º 13
0
import sys
import os
import json
sys.path.append(os.path.join(os.getcwd(), '..'))
import watson_developer_cloud
import watson_developer_cloud.natural_language_understanding.features.v1 as features

nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1(
    version='2017-02-27',
    username='******',
    password='******')
print(dir(features))
response = nlu.analyze(text='what is number of open tickets ?',
                       features=[features.Relations(),
                                 features.Keywords()])

print(json.dumps(response, indent=2))
Ejemplo n.º 14
0
import cgi
import sys
import boto.sqs
from boto.sqs.message import Message
import time
from multiprocessing import *
import threading
import multiprocessing
import boto.sns
import ast

import watson_developer_cloud
import watson_developer_cloud.natural_language_understanding.features.v1 as features

nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1(
    version='2017-02-27',
    username='******',
    password='******')

conn = boto.sqs.connect_to_region(
    "us-west-2",
    aws_access_key_id='AKIAJD27C4BSAXDDCOIQ',
    aws_secret_access_key='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')

conn = boto.sqs.connect_to_region("us-west-2")
q = conn.create_queue('tweets-queue')

host = 'search-twittmap-mtvfhojv7a2yfejtlkpqrvetcy.us-west-2.es.amazonaws.com'
awsauth = AWS4Auth('AKIAJD27C4BSAXDDCOIQ',
                   'xW9UbWfwYkKekZZgv5HDm10ffOE1WUzwITjyGWu8', 'us-west-2',
                   'es')
Ejemplo n.º 15
0
 def __init__(self, username, password):
     self.nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1(
         version='2017-02-27', username=username, password=password)
     self.features = []
     self.queries = []
     self.documents = []
Ejemplo n.º 16
0
def analysis(text):
    natural_language_understanding = watson_developer_cloud.NaturalLanguageUnderstandingV1(
        username="", password="", version="2017-09-22")

    # text = "IBM (http://www.ibm.com) is an American multinational technology company headquartered in Armonk, New York, United States, with operations in over 170 countries. It will hold a Career Fair on tomorrow from 2:00AM to 3:00PM"
    # text = "We are pleased to announce that we are now accepting poster proposals for consideration to the Data Science Conference (www.dsconference.rice.edu) The poster session, to be held on tomorrow 16:00 - 18:00 p.m., is a unique networking opportunity for students. This will take place at RMC, Rice University, Houston, Texas."
    # text = "9/23/2017 is the third installment of Houston Food Tour! Catch a ride to for a delicious meal, an opportunity to get some shopping done, and as always, fun with friends from America and around the world! Someone from Korea will share a little about Korean cuisine! We will meet at China Town, Houston. The activity will start at 7:00PM and will go back before 9:00PM."

    response = natural_language_understanding.analyze(
        text=text,
        features=[
            Features.Keywords(sentiment=True, limit=5
                              # emotion=True,
                              ),
            Features.Entities(sentiment=True,
                              # limit=5
                              )
        ])

    today = datetime.date.today()
    year = int(today.year)
    month = int(today.month)
    day = int(today.day)

    # 2017-9-23
    # 9/23/2017
    # tomorrow
    regexDate = re.findall(r'\d+\S\d+\S\d+', text)
    if (len(regexDate)):
        date = regexDate[0]
        date1 = date.split('-')
        date2 = date.split('/')
        if (len(date1) != 1):
            year = int(date1[0])
            month = int(date1[1])
            day = int(date1[2])
        if (len(date2) != 1):
            year = int(date2[2])
            month = int(date2[0])
            day = int(date2[1])
    else:
        regexDate = re.findall(r'tomorrow', text)
        if (len(regexDate)):
            day += 1

    # 2:00 - 3:00
    # 2:00AM - 3:00PM
    regexDate = re.findall(r'\d:\d\dAM|\d\d:\d\dAM|\d:\d\dPM|\d\d:\d\dPM',
                           text)
    if (len(regexDate) == 2):
        startTime = regexDate[0]
        endTime = regexDate[1]
        time1 = startTime.split(':')
        time2 = endTime.split(':')
        if (time1[1][len(time1[1]) - 2:len(time1[1])] == "PM"):
            startHour = int(time1[0]) + 12
        else:
            startHour = int(time1[0])
        startMin = int(time1[1][0:len(time1[1]) - 2])
        if (time2[1][len(time2[1]) - 2:len(time2[1])] == "PM"):
            endHour = int(time2[0]) + 12
        else:
            endHour = int(time2[0])
        endMin = int(time2[1][0:len(time2[1]) - 2])
    else:
        regexDate = re.findall(r'\d:\d\d|\d\d:\d\d', text)
        if (len(regexDate) == 2):
            startTime = regexDate[0]
            endTime = regexDate[1]
            time1 = startTime.split(':')
            time2 = endTime.split(':')
            startHour = int(time1[0])
            startMin = int(time1[1])
            endHour = int(time2[0])
            endMin = int(time2[1])

    # keywords, location
    # print(json.dumps(response, indent=2))
    entities = response['entities']
    keywords = response['keywords']
    keywordsList = []
    positionList = []
    for i in range(len(keywords)):
        keyword = keywords[i]
        keywordsList.append(keyword['text'])
    for i in range(len(entities)):
        entity = entities[i]
        if (entity['type'] == "Location"):
            positionList.append(entity['text'])
        else:
            if 'disambiguation' in entity:
                subtype = entity['disambiguation']['subtype']
                for j in range(len(subtype)):
                    if subtype[j] == "Location":
                        positionList.append(entity['text'])
                        break

    gmaps = GoogleMaps("AIzaSyDw5pVvqp1Y3mmiFgrFkouqxJGKiHn4C7k")
    # address = 'Constitution Ave NW & 10th St NW, Washington, DC'

    date = ""
    date += str(year)
    if month < 10:
        date += ("-0" + str(month))
    else:
        date += ("-" + str(month))
    if day < 10:
        date += ("-0" + str(day))
    else:
        date += ("-" + str(day))
    startDateTime = date + " "
    endDateTime = date + " "
    if startHour < 10:
        startDateTime += "0" + str(startHour)
    else:
        startDateTime += str(startHour)
    if startMin < 10:
        startDateTime += ":0" + str(startMin)
    else:
        startDateTime += ":" + str(startMin)
    startDateTime += ":00"
    if endHour < 10:
        endDateTime += "0" + str(endHour)
    else:
        endDateTime += str(endHour)
    if startMin < 10:
        endDateTime += ":0" + str(endMin)
    else:
        endDateTime += ":" + str(endMin)
    endDateTime += ":00"

    address = ""
    for i in range(len(positionList)):
        address += positionList[i] + " "
    address = address.rstrip()

    geoLocation = gmaps.geocode(address)[0]['geometry']['location']
    lat = geoLocation['lat']
    lng = geoLocation['lng']
    gps = str(lat) + " " + str(lng)

    url = ""
    r = re.compile(
        r'(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))'
    )
    url_list = r.findall(text)
    if (len(url_list) != 0):
        url = url_list[0][0]

    return startDateTime, endDateTime, address, gps, text, url, keywordsList
# coding: utf-8

# In[4]:

import sys
import os
import json
sys.path.append(os.path.join(os.getcwd(), '..'))
import watson_developer_cloud
import watson_developer_cloud.natural_language_understanding.features.v1 as \
    features

# In[3]:

nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1(
    version='2017-02-27',
    username='******',
    password='******')

# In[6]:

response = nlu.analyze(
    text='Bruce Banner is the Hulk and Bruce Wayne is BATMAN! '
    'Superman fears not Banner, but Wayne.',
    features=[features.Entities(), features.Keywords()])

print(json.dumps(response, indent=2))

# In[ ]: