def get_keywords(text): natural_language_understanding = NaturalLanguageUnderstandingV1( username="******", password="******", version="2017-02-27") response = natural_language_understanding.analyze( text=text, features=[ Features.Entities(emotion=True, sentiment=True, limit=10), Features.Keywords(emotion=True, sentiment=True, limit=10) ]) # print(json.dumps(response, indent=2)) to_ret = {} for a in response["keywords"]: to_ret.update({a['text']: '#'}) for a in response["entities"]: try: to_ret.update({a['text']: a['disambiguation']['dbpedia_resource']}) except: to_ret.update({a['text']: '#'}) return to_ret
def eval_default(): response = nlu.analyze( text='Bruce Banner is the Hulk and Bruce Wayne is BATMAN! ' 'Superman fears not Banner, but Wayne.', features=[features.Entities(), features.Keywords()]) return jsonify(response)
def getSentimentAnalysis(url): """ Get sentiment analysis of a given URL :param: url: URL to get sentiment analysis :returns: Sentiment magnitude and emotion analysis in JSON format """ html = urllib.request.urlopen(url).read() soup = BeautifulSoup(html, 'html.parser') text = textFromHtml(html) natural_language_understanding = nl.enableWatsonNatLang() response = natural_language_understanding.analyze( text= text, features=[ # Get general sentiment of text Features.Sentiment( document=True ), # Get emotion towards relevant entities (max:3) Features.Entities( emotion=True, limit = 3 ) ] ) return json.dumps(response, indent=2)
def main(): credentials = eval("\n".join(open(CREDENTIALS, "r").readlines())) natural_language_understanding = NaturalLanguageUnderstandingV1( version="2017-02-27", username=credentials["username"], password=credentials["password"] ) listOfFiles = [f for f in listdir(PATH) if isfile(join(PATH, f))]#["testData/354962.json"] for fileName in listOfFiles: fileIn = open(join(PATH, fileName), "r") data = json.load(fileIn) val = data["html_lawbox"] if any(word in val for word in ["convict","acquit","guilty","innocen", "sentenc"]): print("Good file: ", fileName) for word in ["convict","acquit","guilty","innocen", "sentenc"]: if word in val: print "it has ", word response = natural_language_understanding.analyze( text=data["html_lawbox"], features = [features.Entities(), features.Relations()] ) print json.dumps(response, indent=2) else: print("Bad file: ", fileName)
def watsonanalyze(stringinput): response = natural_language_understanding.analyze( text=stringinput, features=[ Features.Entities(emotion=True, sentiment=True, limit=2), Features.Keywords(emotion=True, sentiment=True, limit=20) ]) return json.loads(json.dumps(response, indent=2))
def nlp(input_stuff): natural_language_understanding = NaturalLanguageUnderstandingV1( version='2017-02-27', username='******', password='******') response = natural_language_understanding.analyze( text=input_stuff, features=[features.Entities(), features.Keywords()]) return (response["entities"])
def nl_processing(reqd_text): response = natural_language_understanding.analyze(text=reqd_text, features=[ features.Entities(), features.Keywords(), features.Emotion(), features.Concepts(), features.Sentiment() ]) return response
def nlp(input_stuff): natural_language_understanding = NaturalLanguageUnderstandingV1( version='2017-02-27', username="******", password="******") response = natural_language_understanding.analyze( text=input_stuff, features=[features.Entities(), features.Keywords()]) return (response["entities"])
def call_nlgu(text_stmt): Config=configparser.ConfigParser() Config.read("watson.ini") userid=ConfigSectionMap(Config,"Natural Language Understanding-RAI")['username'] pwd=ConfigSectionMap(Config,"Natural Language Understanding-RAI")['password'] natural_language_understanding = NaturalLanguageUnderstandingV1(version='2017-02-27',username=userid,password=pwd) response = natural_language_understanding.analyze(text=text_stmt,features=[features.Entities(), features.Keywords()]) try: print(json.dumps(response, indent=2)) except: return
def worker(): global q print 'Worker Initialized' attributes = ['id', 'text', 'time', 'lat', 'lon'] while True: responses = q.receive_messages(MessageAttributeNames=attributes) if len(responses) != 0: for response in responses: if response.message_attributes is None: response.delete() continue id = response.message_attributes.get('id').get('StringValue') text = response.message_attributes.get('text').get( 'StringValue') time = response.message_attributes.get('time').get( 'StringValue') lat = response.message_attributes.get('lat').get('StringValue') lon = response.message_attributes.get('lon').get('StringValue') try: natural_language_understanding = NaturalLanguageUnderstandingV1(\ version='2017-02-27',\ username=nlu_creds['username'],\ password=nlu_creds['password']\ ) nlu_response = natural_language_understanding.analyze(\ text=text,\ features=[features.Entities(), features.Keywords(), features.Sentiment()]\ ) sentiment = nlu_response['sentiment']['document']['label'] except Exception as e: print 'Error:', e sentiment = 'neutral' # Send to AWS SNS notification = { 'id': id, 'text': text, 'time': time, 'lat': lat, 'lon': lon, 'sentiment': sentiment } try: print notification sns.publish(TargetArn=sns_arn, Message=json.dumps( {'default': json.dumps(notification)})) response.delete() except Exception as e: print 'Error:', e sleep(2)
def extract_sentiment_ner_trec_full(src, dst_dir): """ Extracts tweet overall sentiment, sentiment per NER, NERs, keywords, sentiment per keyword for the full dataset that's read from a .txt file. Parameters ---------- src: str - path to dataset. dst_dir: - directory in which results will be stored. """ tweets = read_txt(src) # Since tweets are ordered according to topic, label them in a # random order keys = tweets.keys() random.shuffle(keys) for idx, tid in enumerate(keys): fname = "{}.json".format(tid) dst = os.path.join(dst_dir, fname) # If file already exists, data was extracted before and due to # rate-limiting the rest couldn't be extracted if not os.path.isfile(dst): try: # Extract features for a tweet via Watson response = natural_language_understanding.analyze( text=tweets[tid]["text"], # Get entities and their features=[ # Overall tweet sentiment Features.Sentiment(), # NER detection and sentiment per NER Features.Entities(sentiment=False), Features.Keywords(sentiment=False), ]) # Store results in UTF-8 encoding fname = "{}.json".format(tid) dst = os.path.join(dst_dir, fname) with codecs.open(dst, "w", encoding="utf-8") as f: # https://stackoverflow.com/questions/18337407/saving-utf-8-texts-in-json-dumps-as-utf8-not-as-u-escape-sequence data = json.dumps(response, ensure_ascii=False, encoding='utf8') f.write(unicode(data)) # Illegal language except watson_developer_cloud.watson_developer_cloud_service.\ WatsonException: pass print "Finished extraction for {} tweets".format(idx + 1)
def featureList(self, tags): f_list = [] for tag in tags: if tag == "sentiment": f_list.append(features.Sentiment()) elif tag == "categories": f_list.append(features.Categories()) elif tag == "concepts": f_list.append(features.Concepts()) elif tag == "emotion": f_list.append(features.Emotion()) elif tag == "entities": f_list.append(features.Entities()) return f_list
def nlu(text): response = n.analyze(text=text, features=[ features.Emotion(), features.Concepts(), features.Categories(), features.Entities(), features.Keywords(), features.SemanticRoles(), features.Relations(), features.Sentiment() ], language='en') return json.dumps(response, indent=2)
def map_feature(name): feature_name_mappings = { 'keywords': features.Keywords(), 'entities': features.Entities(), 'concepts': features.Concepts(), 'categories': features.Categories(), 'sentiment': features.Sentiment(), 'emotion': features.Emotion() } if name in feature_name_mappings: return feature_name_mappings[name] else: print("Invalid feature name") return None
def getEntity(self, text): natural_language_understanding = NaturalLanguageUnderstandingV1( username="******", password="******", version="2017-02-27") response = natural_language_understanding.analyze( text=text, features=[Features.Entities(sentiment=True)]) output = [] # Look inside data for word in response["entities"]: output.append((word['text'], word['type'])) return output
def company_finder(text_in=str): natural_language_understanding = NaturalLanguageUnderstandingV1( version='2017-02-27', username='******', password='******') response = natural_language_understanding.analyze( text=text_in, features=[features.Entities()]) number_of_companis = 0 companis = [] for entity in response["entities"]: if entity['type'] == 'Company': number_of_companis += 1 companis.append(entity['text']) return number_of_companis, companis
def extract_data(text): # Use Watson's NLU API to extract the keywords, entities and concepts from a text bm_username = "******" bm_password = "******" nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1( version='2017-02-27', username=bm_username, password=bm_password) ents = nlu.analyze(text=text, features=[ features.Entities(), features.Keywords(), features.Concepts() ]) ents["tweet"] = text return ents
def watson_sentiments(url_news): natural_language_understanding = NaturalLanguageUnderstandingV1( username="******", password="******", version="2017-02-27") response = natural_language_understanding.analyze( url=url_news, features=[ Features.Entities( # Entities options sentiment=True, limit=1) ]) return response
def test_model(): model = request.forms.get('model') natural_language_understanding = NaturalLanguageUnderstandingV1( version='2017-02-27', username="******", password='******') response = natural_language_understanding.analyze(text=model, features=[ features.Entities(), features.Keywords(), features.Concepts() ]) return (json.dumps(response, indent=2))
def extract_sentiment_ner_twitter(cleaned, min_annos, dst_dir): """ Extracts tweet overall sentiment, sentiment per NER, NERs, keywords, sentiment per keyword. Parameters ---------- cleaned: bool - True if only cleaned data should be used (i.e. any additional labels (and their annotation times) assigned to tweets considered "Irrelevant" are ignored) min_annos: int - minimum number of annotators who must've labeled a tweet for it to be considered. dst_dir: - directory in which results will be stored. """ tweets = read_twitter(cleaned, min_annos) for idx, tid in enumerate(tweets): tweet = tweets[tid] fname = "{}.json".format(tid) dst = os.path.join(dst_dir, fname) print tweet["text"] # If file already exists, data was extracted before and due to # rate-limiting the rest couldn't be extracted if not os.path.isfile(dst): # Extract features for a tweet via Watson response = natural_language_understanding.analyze( text=tweet["text"], # Get entities and their features=[ # Overall tweet sentiment Features.Sentiment(), # NER detection and sentiment per NER Features.Entities(sentiment=True), Features.Keywords(sentiment=True), ]) # Store results in UTF-8 encoding fname = "{}.json".format(tid) dst = os.path.join(dst_dir, fname) with codecs.open(dst, "w", encoding="utf-8") as f: # https://stackoverflow.com/questions/18337407/saving-utf-8-texts-in-json-dumps-as-utf8-not-as-u-escape-sequence data = json.dumps(response, ensure_ascii=False, encoding='utf8') f.write(unicode(data)) print "Finished extraction for {} tweets".format(idx + 1)
def clasificarDescripcion(pDescripcion): # if 'VCAP_SERVICES' in os.environ: # services = json.loads(os.getenv('VCAP_SERVICES')) with open('AutosClasificados\core\config.json') as json_data_file: vConfig = json.load(json_data_file) vAPIUserNLU= vConfig["watsonNLU"]["vAPIUser"] vAPIPassNLU = vConfig["watsonNLU"]["vAPIPass"] vAPIVersionNLU = vConfig["watsonNLU"]["vAPIVersion"] vUmbralMinScore_WNLU = vConfig["watsonNLU"]["vUmbralMinScore_WNLU"] vUmbralMinDescripcion = vConfig["otros"]["vUmbralMinDescripcion"] vResultado_NLU = '' vWatson_NLU = NaturalLanguageUnderstandingV1(username=vAPIUserNLU, password=vAPIPassNLU, version=vAPIVersionNLU) vListaKeywords = list() try: if len(pDescripcion) > vUmbralMinDescripcion: vResultado_NLU = vWatson_NLU.analyze( text=pDescripcion, features=[ Features.Entities( emotion=True, sentiment=True, limit=6 ), Features.Keywords( emotion=True, sentiment=True, limit=6 ) ], language="en" ) vResultado_NLU = json.loads(json.dumps(vResultado_NLU, indent=2)) if vResultado_NLU['keywords']: for entitien in vResultado_NLU['entities']: print(entitien) for vResultado in vResultado_NLU['keywords']: print(vResultado) if vResultado['relevance'] > vUmbralMinScore_WNLU: vListaKeywords.append(vResultado['text']) return vListaKeywords except: vListaKeywords.append('No hay Keywords disponibles') return vListaKeywords
def respond_chat(request): print("respond_chat got called") global response global inp # Getting the last context for the concerned user last_context = Mess.objects.filter(user=request.user).last().context # print(last_context.last().created) # Sending the message to the bot and fetching a response print("INP------------") print(inp) nlu_response = nlu.analyze( text=inp, features=[features.Entities(), features.Keywords()]) if (last_context == ""): response = conversation.message(workspace_id=workspace_id, message_input={'text': inp}) else: response = conversation.message(workspace_id=workspace_id, message_input={'text': inp}, context=eval(last_context)) action = identifier(response, nlu_response) print("ACTION REQUESTED") print(action) # CHECK WHAT THE ACTION IS # print(type(response['intents'])) # if(response['intents']['intent'][0] == "create_playlist"): # print("USER WANTS A PLAYLIST MAN") new_mess = Mess(text=response['output']['text'][0], user=request.user, created=timezone.now(), reality_coefficient=False, context=repr(response['context'])) new_mess.save() mess = Mess.objects.all() response_text = serializers.serialize('json', Mess.objects.all()) return HttpResponse(response_text, content_type='application/json')
def report(self, text): """ Returns the Watson Data for a specific text. """ # Real Call payload = self.natural_language_understanding.analyze( text=text, features=[ features.Entities(), features.Keywords(), features.Emotion() ]) # Fake Call, since we only have limited access to IBM # payload = self.mock_watson(text) return payload
def get_sentiment(txt): response = natural_language_understanding.analyze( text=txt, features=[ Features.Entities( emotion=True, sentiment=True, limit=2 ), Features.Keywords( emotion=True, sentiment=True, limit=2 ) ] ) print(json.dumps(response, indent=2))
def analyze(text, threshold=0.5): text = text.encode('ascii', errors='ignore') natural_language_understanding = NaturalLanguageUnderstandingV1( version='2017-02-27', username='******', password='******') response = natural_language_understanding.analyze(text=text, features=[ features.Entities(), features.Keywords(), features.Concepts(), features.Sentiment() ]) decoder = json.JSONDecoder() decoded_response = decoder.decode(json.dumps(response, indent=2)) language = decoded_response["language"] keywords = decoded_response["keywords"] entities = decoded_response["entities"] concepts = decoded_response["concepts"] sentiment = decoded_response["sentiment"] keywords = sorted(keywords, key=lambda x: -x['relevance']) keywords = [ keyword for keyword in keywords if keyword['relevance'] >= threshold ] keywords = [keyword['text'] for keyword in keywords] entities = sorted(entities, key=lambda x: -x['relevance']) entities = [ entity for entity in entities if entity['relevance'] >= threshold ] entities = [(entity['type'], entity['text']) for entity in entities] concepts = sorted(concepts, key=lambda x: -x['relevance']) concepts = [ concept for concept in concepts if concept['relevance'] >= threshold ] concepts = [concept['text'] for concept in concepts] sentiment = (sentiment['document']['label'], sentiment['document']['score'])
def main(params): natural_language_understanding = NaturalLanguageUnderstandingV1( username=params["username"], password=params["password"], version=params["version"]) response = natural_language_understanding.analyze( url=params["url"], features=[ Features.Concepts(limit=1), Features.Entities(limit=1), Features.Keywords(limit=1), Features.Categories(), Features.Emotion(), Features.Sentiment(), Features.MetaData(), Features.Relations(), Features.SemanticRoles(limit=1) ]) return response
def process(line): # replace some known utf-8 chars with ascii line = re.sub("\xe2\x80\x99", "x", line) # U+2019 (right single quotation mark) line = re.sub("\xe2\x80\x93", "-", line) # U+2013 (EN-DASH) # remove the rest of the non-ascii chars line = re.sub(r'[^\x00-\x7F]+', ' ', line) if len(line) > 0: response = client.analyze(text=line, features=[Features.Entities()]) tags = set() print(json.dumps(response, indent=2)) for entity in response['entities']: name = entity['text'].encode('ascii', 'ignore') tags.update(query(name)) return ",".join(tags).encode('utf-8') else: return ""
def get_nlu(data): response = natural_language_understanding.analyze( text = data, features=[ Features.Entities( emotion=True, sentiment=True, limit=2), Features.Keywords( emotion=True, sentiment=True, limit=2), Features.Emotion(), ] ) info = json.dumps(response, indent=2) return response
def execute_watson_request(text): natural_language_understanding = NaturalLanguageUnderstandingV1( username=constants.WATSON_USER, password=constants.WATSON_PASS, version="2017-02-27") try: response = natural_language_understanding.analyze( text=text, features=[ features.Concepts(), features.Categories(), features.Emotion(), features.Entities(emotion=True, sentiment=True), features.Keywords(emotion=True, sentiment=True), features.Sentiment() ]) return response except WatsonException as error: return str(error)
def understand_text(self): natural_language_understanding = NaturalLanguageUnderstandingV1( username=nlu_settings.get("username"), password=nlu_settings.get("password"), version="2017-02-27") self.nl_understanding = natural_language_understanding.analyze( text=self.converted_text, features=[ Features.Entities(emotion=True, sentiment=True, limit=100), Features.Keywords(emotion=True, sentiment=True, limit=100), Features.Categories(), Features.Concepts(), Features.Sentiment(), Features.Emotion(), # Features.Feature(), # Features.MetaData(), Features.Relations(), Features.SemanticRoles(), ]) return self.nl_understanding