def on_data(self, data): try: #json_data = status._json tweet = json.loads(data) textdata = tweet['text'] # print textdata if tweet['coordinates']: print tweet['coordinates'] #response = queue.send_message(MessageBody=tweet['text']) # print (tweet) producer = KafkaProducer( value_serializer=lambda v: json.dumps(v).encode('utf-8')) producer.send('fizzbuzz', textdata) consumer = KafkaConsumer(bootstrap_servers='localhost:9092', auto_offset_reset='earliest') consumer.subscribe(['fizzbuzz']) for message in consumer: print(message) nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1( version='2017-02-27', username='', password='') nlu.analyze(text=textdata, features=[features.Sentiment()]) except Exception as e: #print("exception: "+e) pass def on_error(self, status): print(status) return True
def run_watson_nlu(): files = glob.glob('work/bug-*.json') (user, passwd) = get_auth() for fname in files: with open(fname) as f: LOG.debug("Processing %s" % fname) bug = json.loads(f.read()) num = bug["link"].split("/")[-1] with open("work/res-%s.json" % num, "w") as out: nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1( version='2017-02-27', username=user, password=passwd) res = nlu.analyze(text=bug["comments"], features=[ features.Concepts(), features.Keywords(), features.Emotion(), features.Sentiment(), ]) output = { "link": bug["link"], "tags": bug["tags"], "importance": bug["importance"], "length": len(bug["comments"]), "results": res } out.write(json.dumps(output, indent=4))
def sentiment(tips): # Helper function to return text sentiment analysis # Load Watson credentials username=os.environ.get('NLU_USERNAME') password = os.environ.get('NLU_PASSWORD') nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1(version='2017-02-27', username=username, password=password) output = nlu.analyze(text=tips, features=[features.Sentiment()]) return output['sentiment']['document']['score']
def extract_data(text): # Use Watson's NLU API to extract the keywords, entities and concepts from a text bm_username = "******" bm_password = "******" nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1( version='2017-02-27', username=bm_username, password=bm_password) ents = nlu.analyze(text=text, features=[ features.Entities(), features.Keywords(), features.Concepts() ]) ents["tweet"] = text return ents
def nlp_parse(usr, password): try: return watson_developer_cloud.NaturalLanguageUnderstandingV1( version='2017-02-27', username=usr, password=password) except: return []
# coding: utf-8 # In[4]: import sys import os import json sys.path.append(os.path.join(os.getcwd(), '..')) import watson_developer_cloud import watson_developer_cloud.natural_language_understanding.features.v1 as \ features # In[3]: nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1( version='2017-02-27', username='******', password='******') # In[6]: response = nlu.analyze( text='Bruce Banner is the Hulk and Bruce Wayne is BATMAN! ' 'Superman fears not Banner, but Wayne.', features=[features.Entities(), features.Keywords()]) print(json.dumps(response, indent=2)) # In[ ]:
# Watson Visual Recognition # VISUAL_RECOGNITION_API_KEY = '<add_vr_api_key>' # Watson Natural Launguage Understanding (NLU) NATURAL_LANGUAGE_UNDERSTANDING_USERNAME = '******' NATURAL_LANGUAGE_UNDERSTANDING_PASSWORD = '******' # Watson Tone Analyzer TONE_ANALYZER_USERNAME = '******' TONE_ANALYZER_PASSWORD = '******' # Create the Watson clients nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1( version='2017-02-27', username=NATURAL_LANGUAGE_UNDERSTANDING_USERNAME, password=NATURAL_LANGUAGE_UNDERSTANDING_PASSWORD) # tone_analyzer = ToneAnalyzerV3(version='2016-05-19', # username=TONE_ANALYZER_USERNAME, # password=TONE_ANALYZER_PASSWORD) # visual_recognition = VisualRecognitionV3(version='2018-05-22', iam_apikey=VISUAL_RECOGNITION_API_KEY) # **Insert to code > Insert pandas DataFrame** # df_data_1 = pd.read_csv('C:\\Users\\Rafael\\Downloads\\us-consumer-finance-complaint-database\\consumer_complaints_only_with_narrative_100.csv', encoding='latin-1')#, low_memory=False) df_data_1 = pd.read_csv( 'C:\\Users\\Rafael\\Downloads\\us-consumer-finance-complaint-database\\consumer_complaints_only_with_narrative_100.csv', sep=',', error_bad_lines=False, index_col=False, dtype="unicode")
# coding: utf-8 # In[4]: import sys import os sys.path.append(os.path.join(os.getcwd(), '..')) import watson_developer_cloud import watson_developer_cloud.natural_language_understanding.features.v1 as \ features # In[3]: nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1( version='2016-01-23', username='******', password='******') # In[6]: nlu.analyze(text='this is my experimental text. Bruce Banner is the Hulk' ' and Bruce Wayne is BATMAN! Superman fears not Banner, ' 'but Wayne.', features=[features.Entities(), features.Keywords()]) # In[ ]:
import sys import os import csv import codecs sys.path.append(os.path.join(os.getcwd(), '..')) import watson_developer_cloud import watson_developer_cloud.natural_language_understanding.features.v1 as features nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1( version='2017-02-27', username='******', password='******') Channels = [ "cityworld", "cnn", "dailytime", "empire", "foreignpolicy", "fox", "goneleft", "mutiny", "nbc", "news70", "newspolitics", "nytimes", "realnews", "truth", "usgreat", "webdaily", "whitehouse" ] #Two output files to title and content sentiments Analyze_Files = ["SentimentTitle.csv", "SentimentContent.csv"] #for channel in Channels: # index at 1 and 3 is Title and Content in FullData.csv; we are toggling for them #index = 1 index = 3 for value in Analyze_Files: #for each newspaper's fullData.csv content and title we are doing analyses with codecs.open("CleanData" + value, 'w', "utf-8-sig") as outputFile:
end = -1 while j < len(results): if (results[j] == '"' and start == -1): start = j elif (results[j] == '"' and end == -1): end = j classes += [results[start + 1:end]] break j += 1 i = results.find(label, i + 1) # print(classes) s = "" for k in classes: nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1( version='2017-02-27', username='******', password='******') h = nlu.analyze(text=k, features=[features.Entities()]) names = [] if "person" in classes: face_results = None with open('stillframe.jpg', 'rb') as images_file2: json_obj2 = visual_recognition.detect_faces( images_file=images_file2) face_results = str(json.dumps(json_obj2, indent=2)) name = '"name"' i2 = results.find(name) while i2 != -1: j2 = i2 + len(label)
def identify_artist(self): # os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.abspath(os.getcwd() + cr.GOOGLE_APP_NLP_CRED_PATH) # print(os.environ['GOOGLE_APPLICATION_CREDENTIALS']) # Using Google Cloud API # from googleapiclient import discovery # import httplib2 # from oauth2client.client import GoogleCredentials # # DISCOVERY_URL = cr.GOOGLE_APP_NLP_DISCOVERY_URL # # http = httplib2.Http() # # credentials = # GoogleCredentials.get_application_default().create_scoped(['https://www.googleapis.com/auth/cloud-platform']) # credentials.authorize(http) # # service = discovery.build('language', 'v1beta2', http=http, discoveryServiceUrl=DISCOVERY_URL) # # service_request = service.documents().annotateText( # body={ # 'document': { # 'type': 'PLAIN_TEXT', # 'content': speech # }, # 'features': { # 'extractSyntax': False, # 'extractEntities': True, # 'extractDocumentSentiment': False # } # }) # # response = service_request.execute() # Using IBM Watson Natural Language Understanding API nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1(version=self.WDC_NLUV1_VERSION, username=self.WDC_NLUV1_USERNAME, password=self.WDC_NLUV1_PWD) ret = nlu.analyze(text=self.speech, features=[features.Entities(), features.Keywords()]) pprint(ret) # Going forward with IBM Watson APIs keywords = ret['keywords'] key = [] for elem in keywords: key.append(elem['text']) # If Play is present in any of Keywords: # If yes, check if any Artist or Music group present in other Keywords. # If yes, fetch other keywords, and use that as text to search for song of particular artist! play_action = False for elem in key: if 'play' in elem.lower(): play_action = True if not play_action: if 'play' in self.speech.lower(): play_action = True # If Play_Action is True, we need to call search_video function if play_action: search_query = '' entities = ret['entities'] entities = entities[0] ty = entities['type'] text = entities['text'] if ty == 'MusicGroup' or ty == 'Person' or ty == 'Company': # Means it corresponds to particular artist of Musical Band! search_query += text + ' ' for elem in key: if 'play' not in elem.lower(): if text not in elem.lower(): search_query += elem + ' ' print(search_query) return search_query
import sys import os sys.path.append(os.path.join(os.getcwd(), '..')) import watson_developer_cloud import watson_developer_cloud.natural_language_understanding.features.v1 as features nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1( version='2017-02-27', username='******', password='******') h = nlu.analyze(text='apple', features=[features.Entities(), features.Keywords()]) print(h)
import sys import os import json sys.path.append(os.path.join(os.getcwd(), '..')) import watson_developer_cloud import watson_developer_cloud.natural_language_understanding.features.v1 as features nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1( version='2017-02-27', username='******', password='******') print(dir(features)) response = nlu.analyze(text='what is number of open tickets ?', features=[features.Relations(), features.Keywords()]) print(json.dumps(response, indent=2))
import cgi import sys import boto.sqs from boto.sqs.message import Message import time from multiprocessing import * import threading import multiprocessing import boto.sns import ast import watson_developer_cloud import watson_developer_cloud.natural_language_understanding.features.v1 as features nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1( version='2017-02-27', username='******', password='******') conn = boto.sqs.connect_to_region( "us-west-2", aws_access_key_id='AKIAJD27C4BSAXDDCOIQ', aws_secret_access_key='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') conn = boto.sqs.connect_to_region("us-west-2") q = conn.create_queue('tweets-queue') host = 'search-twittmap-mtvfhojv7a2yfejtlkpqrvetcy.us-west-2.es.amazonaws.com' awsauth = AWS4Auth('AKIAJD27C4BSAXDDCOIQ', 'xW9UbWfwYkKekZZgv5HDm10ffOE1WUzwITjyGWu8', 'us-west-2', 'es')
def __init__(self, username, password): self.nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1( version='2017-02-27', username=username, password=password) self.features = [] self.queries = [] self.documents = []
def analysis(text): natural_language_understanding = watson_developer_cloud.NaturalLanguageUnderstandingV1( username="", password="", version="2017-09-22") # text = "IBM (http://www.ibm.com) is an American multinational technology company headquartered in Armonk, New York, United States, with operations in over 170 countries. It will hold a Career Fair on tomorrow from 2:00AM to 3:00PM" # text = "We are pleased to announce that we are now accepting poster proposals for consideration to the Data Science Conference (www.dsconference.rice.edu) The poster session, to be held on tomorrow 16:00 - 18:00 p.m., is a unique networking opportunity for students. This will take place at RMC, Rice University, Houston, Texas." # text = "9/23/2017 is the third installment of Houston Food Tour! Catch a ride to for a delicious meal, an opportunity to get some shopping done, and as always, fun with friends from America and around the world! Someone from Korea will share a little about Korean cuisine! We will meet at China Town, Houston. The activity will start at 7:00PM and will go back before 9:00PM." response = natural_language_understanding.analyze( text=text, features=[ Features.Keywords(sentiment=True, limit=5 # emotion=True, ), Features.Entities(sentiment=True, # limit=5 ) ]) today = datetime.date.today() year = int(today.year) month = int(today.month) day = int(today.day) # 2017-9-23 # 9/23/2017 # tomorrow regexDate = re.findall(r'\d+\S\d+\S\d+', text) if (len(regexDate)): date = regexDate[0] date1 = date.split('-') date2 = date.split('/') if (len(date1) != 1): year = int(date1[0]) month = int(date1[1]) day = int(date1[2]) if (len(date2) != 1): year = int(date2[2]) month = int(date2[0]) day = int(date2[1]) else: regexDate = re.findall(r'tomorrow', text) if (len(regexDate)): day += 1 # 2:00 - 3:00 # 2:00AM - 3:00PM regexDate = re.findall(r'\d:\d\dAM|\d\d:\d\dAM|\d:\d\dPM|\d\d:\d\dPM', text) if (len(regexDate) == 2): startTime = regexDate[0] endTime = regexDate[1] time1 = startTime.split(':') time2 = endTime.split(':') if (time1[1][len(time1[1]) - 2:len(time1[1])] == "PM"): startHour = int(time1[0]) + 12 else: startHour = int(time1[0]) startMin = int(time1[1][0:len(time1[1]) - 2]) if (time2[1][len(time2[1]) - 2:len(time2[1])] == "PM"): endHour = int(time2[0]) + 12 else: endHour = int(time2[0]) endMin = int(time2[1][0:len(time2[1]) - 2]) else: regexDate = re.findall(r'\d:\d\d|\d\d:\d\d', text) if (len(regexDate) == 2): startTime = regexDate[0] endTime = regexDate[1] time1 = startTime.split(':') time2 = endTime.split(':') startHour = int(time1[0]) startMin = int(time1[1]) endHour = int(time2[0]) endMin = int(time2[1]) # keywords, location # print(json.dumps(response, indent=2)) entities = response['entities'] keywords = response['keywords'] keywordsList = [] positionList = [] for i in range(len(keywords)): keyword = keywords[i] keywordsList.append(keyword['text']) for i in range(len(entities)): entity = entities[i] if (entity['type'] == "Location"): positionList.append(entity['text']) else: if 'disambiguation' in entity: subtype = entity['disambiguation']['subtype'] for j in range(len(subtype)): if subtype[j] == "Location": positionList.append(entity['text']) break gmaps = GoogleMaps("AIzaSyDw5pVvqp1Y3mmiFgrFkouqxJGKiHn4C7k") # address = 'Constitution Ave NW & 10th St NW, Washington, DC' date = "" date += str(year) if month < 10: date += ("-0" + str(month)) else: date += ("-" + str(month)) if day < 10: date += ("-0" + str(day)) else: date += ("-" + str(day)) startDateTime = date + " " endDateTime = date + " " if startHour < 10: startDateTime += "0" + str(startHour) else: startDateTime += str(startHour) if startMin < 10: startDateTime += ":0" + str(startMin) else: startDateTime += ":" + str(startMin) startDateTime += ":00" if endHour < 10: endDateTime += "0" + str(endHour) else: endDateTime += str(endHour) if startMin < 10: endDateTime += ":0" + str(endMin) else: endDateTime += ":" + str(endMin) endDateTime += ":00" address = "" for i in range(len(positionList)): address += positionList[i] + " " address = address.rstrip() geoLocation = gmaps.geocode(address)[0]['geometry']['location'] lat = geoLocation['lat'] lng = geoLocation['lng'] gps = str(lat) + " " + str(lng) url = "" r = re.compile( r'(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))' ) url_list = r.findall(text) if (len(url_list) != 0): url = url_list[0][0] return startDateTime, endDateTime, address, gps, text, url, keywordsList