def test(): set_api_key("write your api key here") similarity("Sachin is the greatest batsman", "Tendulkar is the finest cricketer") sentiment("Come on, lets play together") ner("Narendra Modi is the prime minister of India") keywords( "Prime Minister Narendra Modi tweeted a link to the speech Human Resource Development Minister Smriti Irani made in the Lok Sabha during the debate on the ongoing JNU row and the suicide of Dalit scholar Rohith Vemula at the Hyderabad Central University." ) emotion("Did you hear the latest Porcupine Tree song ? It's rocking !") intent( "Finance ministry calls banks to discuss new facility to drain cash") abuse("you f**king a$$hole") batch_intent([ "drugs are fun", "don\'t do drugs, stay in school", "lol you a f*g son", "I have a throat infection" ]) batch_abuse([ "drugs are fun", "don\'t do drugs, stay in school", "lol you a f*g son", "I have a throat infection" ]) batch_ner([ "drugs are fun", "don\'t do drugs, stay in school", "lol you a f*g son", "I have a throat infection" ]) batch_sentiment([ "drugs are fun", "don\'t do drugs, stay in school", "lol you a f*g son", "I have a throat infection" ]) batch_phrase_extractor([ "drugs are fun", "don\'t do drugs, stay in school", "lol you a f*g son", "I have a throat infection" ])
def test(): similarity("Sachin is the greatest batsman", "Tendulkar is the finest cricketer") sentiment("Come on, lets play together") taxonomy("Narendra Modi is the prime minister of India") ner("Narendra Modi is the prime minister of India") keywords( "Prime Minister Narendra Modi tweeted a link to the speech Human Resource Development Minister Smriti Irani made in the Lok Sabha during the debate on the ongoing JNU row and the suicide of Dalit scholar Rohith Vemula at the Hyderabad Central University." ) emotion("Did you hear the latest Porcupine Tree song ? It's rocking !") intent( "Finance ministry calls banks to discuss new facility to drain cash") abuse("you f**king a$$hole")
def tryNER(request): if request.method == 'POST': sentence = request.POST.get('sent') data = ner(sentence) nero = data['entities'] print(nero) return render(request, 'trial.html', {'nero': nero})
def main(self): if len(sys.argv) < 2: # Check text has been passed in via the command line. print("You have not passed in text to perform NER on.") else: """ Perform ner on the argument which has been passed into the command line.""" print(ner(sys.argv[1]))
def group_extractor(text): response = paralleldots.ner(text, lang_code) #print(response) l = [ el['name'] for el in response['entities'] if el['category'] == 'group' or el['category'] == 'place' ] return l
def get_ner(text): ''' Get NER chunks from text @return { 'person': [], 'location': [], 'organization': [] } ''' ner_api_return = paralleldots.ner(text) ner_result = {'person': [], 'location': [], 'organization': []} if 'entities' in ner_api_return: ner_result = _transform_ner_api_resp(ner_api_return['entities']) return ner_result
def redictedText(text): paralleldots.set_api_key("RPy5b3CAGG60DZvjDRHNcVKEzybZvlUQF3zEUntDHIU") possibleGender = [ 'he', 'she', 'male', 'female', 'transgender', 'lesbain', 'his', 'her', 'him' ] # print("Enter any input") # tokenized = sent_tokenize(text) # print(tokenized) tokenizer = RegexpTokenizer(r'\w+') txt = tokenizer.tokenize(text) resultedText = " ".join(txt) # print(resultedText) aadhar = (re.findall(r'\d{1,12}', text)) num = [] num += aadhar num += possibleGender # print(num) print("\n") try: wordsFromApi = [] response = paralleldots.ner(resultedText) # print(response) # print(response['entities']) res = response['entities'] for i in range(len(res)): dic = res[i] # print(dic['name']) wordsFromApi += (dic['name'].split()) num += wordsFromApi # print(num) except Exception as e: print( str("failed to recognize your text. please check your internet connection :(" )) redictedText = "" resultedList = resultedText.split() for i in resultedList: # print(i) if (i in num) or ((i.lower()) in num): redictedText += "*" else: redictedText += i redictedText += " " # using IBM api natural_language_understanding = NaturalLanguageUnderstandingV1( username='******', password='******', version='2018-11-16') response = natural_language_understanding.analyze( text=text, features=Features(entities=EntitiesOptions( sentiment=True))).get_result() resultFromIBM = [] relevanceFromIBM = [] for key in response['entities']: # print(key) resultFromIBM.append(key['text']) relevanceFromIBM.append(key['relevance']) arr = [str(r) for r in resultFromIBM] return redictedText, arr, relevanceFromIBM
# Paralleldots to take out Npe, and some more results import paralleldots # paralleldots.set_api_key("1XLNThDtfEr52Sltn7zDJWShmqJU2NbfHmF5eSyu1Fo") # f = open("C:\\Users/shiva/Downloads/AI-Sentiment-Analysis/unigram.output.txt",'w' , encoding="ISO-8859-1") # Taking out sentiment sent = paralleldots.sentiment(Xtest_text) resultSent = pd.DataFrame.from_dict(sent) print("Sentiment of Given Text:") print(resultSent) print("\n") # Taking out NER Information ner = paralleldots.ner(Xtest_text) resultNER = pd.DataFrame.from_dict(ner) print("NER Information of Given Text:") print(resultNER) # Compute the error. It is slightly different from our model because the internals of this process work differently from our implementation. # fpr, tpr, thresholds = metrics.roc_curve(Ytrain, predictions, pos_label=1) # print("Multinomial naive bayes AUC: {0}".format(metrics.auc(fpr, tpr))) """ print ("-----------------------ANALYSIS ON THE TEST DATA ---------------------------") print ("Unigram Model on the Test Data--") Xtest_uni = uni_vectorizer.transform(Xtest_text) print ("Applying the stochastic descent") Ytest_uni = stochastic_descent(Xtrain_uni, Ytrain, Xtest_uni)
def extract_info(): twitter_client = TwitterClient() tweet_analyzer = TweetAnalyzer() api = twitter_client.get_twitter_client_api() tknzr = TweetTokenizer(strip_handles=True, reduce_len=True) tweets = api.user_timeline(count=10, lang='en') df = tweet_analyzer.tweets_to_data_frame(tweets) for i in range(1): text = df['text'][ i] # "need food for 6 people at mesra contact no. 9932356934 email = [email protected]" temp = '' print(paralleldots.abuse(text)) response1 = paralleldots.abuse(text) print(response1) # Find intent of the user input response2 = paralleldots.intent(text) print(response2) if response1['sentence_type'] == 'Abusive' or response2[ 'probabilities']['spam/junk'] > 0.5: flag = False ans = ['0, 0', '0', '0'] return ans else: flag = True # print(flag) if flag: flag1 = False allsyns1 = set(ss for word in flood_related_words for ss in wordnet.synsets(word)) allsyns2 = set(ss for word in tknzr.tokenize(text) for ss in wordnet.synsets(word)) best = max((wordnet.wup_similarity(s1, s2) or 0, s1, s2) for s1, s2 in product(allsyns1, allsyns2)) print(best) if best[0] > 0.6: flag1 = True if flag1: response = paralleldots.ner(text) print(response) for j in range(len(response['entities'])): if (response['entities'][j]['category'] == 'place' and response['entities'][j]['confidence_score'] > 0.6): print(response['entities'][j]['name']) # get_location(response['entities'][i]['name']) category = { "need": ['need', 'require', 'want', 'lack'], "offer": [ 'send', 'have', 'give', 'donate', 'transfer', 'distribute', 'aid', 'help', 'procure' ] } response = paralleldots.custom_classifier(text, category) print(response) if response['taxonomy'][0]['confidence_score'] > response[ 'taxonomy'][1]['confidence_score']: temp = "need" else: temp = "offer" num = get_contact(text, tweets[0]) if temp == "need": category = { "food": [], "water": [], "shelter": [], "first-aid": [], "help": [] } response = paralleldots.custom_classifier(text, category) print(response) x = 0 for j in range(5): if response['taxonomy'][i]['confidence_score'] > x: cat = response['taxonomy'][i]['tag'] else: category = { "food": [], "water": [], "shelter": [], "first-aid": [] } response = paralleldots.custom_classifier(text, category) print(response) x = 0 for j in range(4): if response['taxonomy'][i]['confidence_score'] > x: cat = response['taxonomy'][i]['tag'] quantity = re.findall(quant_no, text) qnt = [] for j in quantity: if len(j) < 10: qnt.append(j) print(qnt) s = tweets[0] loc1 = False if s.coordinates is None: sn = s.user.screen_name m = "@%s Hello! please share your location while tweeting" % ( sn) s = api.update_status(m, s.id) else: loc1 = True ans = [] if loc1: ans.append( str(tweets[0].coordinates['coordinates'][1]) + ', ' + str(tweets[0].coordinates['coordinates'][0])) else: ans.append('0, 0') ans.append(num) print(len(qnt)) if len(qnt) > 0: ans.append(qnt[0]) else: ans.append('0') print(ans) return ans
def processDataNow(self): print("inside processDataNow") mydb = mysql.connector.connect(host="cmpe272.ccmlabqieyyi.us-east-1.rds.amazonaws.com", user="******", passwd="cmpe2722", database="cmpe272") create_table_statement = "create table if not exists result_table ( data TEXT ,location TEXT , retweet_count int , favorite_count int ,time_zone TEXT , is_sarcastic int, name TEXT, org TEXT, place TEXT);" mycursor = mydb.cursor() mycursor.execute(create_table_statement) truncate_table_statement = "truncate table result_table" mycursor.execute(truncate_table_statement) model = tensorflow.keras.models.load_model('model/my_model.h5') #f = h5py.File('model/my_model.h5', 'r') #print(f.attrs.get('keras_version')) #print("model loaded") #print(model) print("model printed") # Process the input file input_file_path = 'data/input_data/data.json' result_file_path = 'data/result_data/result.json' #model = load_model('model/my_model.h5') with open(input_file_path) as json_file: json_array = json.load(json_file) #input_file = open(input_file_path) #json_array = json.load(input_file) store_list = [] for item in json_array: store_details = {"text": None, "location": None, "retweet_count": None, "favorite_count": None, "time_zone": None, "is_sarcastic": None} store_details['text'] = item['text'] store_details['location'] = item['location'] store_details['retweet_count'] = item['retweet_count'] store_details['favorite_count'] = item['favorite_count'] store_details['time_zone'] = item['time_zone'] store_list.append(store_details) # print(store_list) for i in store_list: name='' group='' place='' headline = [i['text']] tokenizer = Tokenizer(num_words=2000, split=' ') tokenizer.fit_on_texts(headline) # headline = tokenizer.fit_on_texts(headline) headline = tokenizer.texts_to_sequences(headline) headline = pad_sequences(headline, maxlen=29, dtype='int32', value=0) sentiment = model.predict(headline, batch_size=1, verbose=2)[0] #print(sentiment) if (np.argmax(sentiment) == 0): i['is_sarcastic'] = 0 print('sarcastic') elif (np.argmax(sentiment) == 1): i['is_sarcastic'] = 1 print('non-sarcastic') nerDict = {"name"} import paralleldots paralleldots.set_api_key("iNzaBLAI4u60Eu6SULcErgbTf8lFDAj8rJ6rAajgvOw") nerDataJson = paralleldots.ner(i['text']) nerStr = json.dumps(nerDataJson["entities"]) nerList = json.loads(nerStr) print(nerList) for data in nerList: if (data["category"] == "group"): group = data["name"] if (data["category"] == "name"): name = data["name"] if (data["category"] == "place"): place = data["name"] insert_sql = "INSERT INTO result_table (data , location ,retweet_count, favorite_count, time_zone, is_sarcastic , name, org, place) VALUES (%s, %s, %s, %s, %s, %s,%s, %s, %s)" values = ( i['text'], i['location'], i['retweet_count'], i['favorite_count'], i['time_zone'], i['is_sarcastic'], name,group, place ) mycursor.execute(insert_sql, values) mydb.commit() with open(result_file_path, 'w') as outfile: json.dump(store_list, outfile)
def test(): set_api_key("Put your Api key here") category = { "finance": ["markets", "economy", "shares"], "world politics": ["diplomacy", "UN", "war"], "india": ["congress", "india", "bjp"] } print( similarity("Sachin is the greatest batsman", "Tendulkar is the finest cricketer")) print(sentiment("Come on, lets play together")) print(ner("Narendra Modi is the prime minister of India", "en")) print( taxonomy( "Michael Jordan of the Chicago Bulls is getting a 10-hour Netflix documentary in 2019" )) print( keywords( "Prime Minister Narendra Modi tweeted a link to the speech Human Resource Development Minister Smriti Irani made in the Lok Sabha during the debate on the ongoing JNU row and the suicide of Dalit scholar Rohith Vemula at the Hyderabad Central University." )) print( phrase_extractor( "Prime Minister Narendra Modi tweeted a link to the speech Human Resource Development Minister Smriti Irani made in the Lok Sabha during the debate on the ongoing JNU row and the suicide of Dalit scholar Rohith Vemula at the Hyderabad Central University." )) print( emotion( "Did you hear the latest Porcupine Tree song ? It's rocking !")) print( intent( "Finance ministry calls banks to discuss new facility to drain cash" )) print(abuse("you f**king a$$hole")) print( custom_classifier("Narendra Modi is the prime minister of India", category)) print( batch_intent([ "drugs are fun", "don\'t do drugs, stay in school", "lol you a f*g son", "I have a throat infection" ])) print(batch_abuse(["drugs are fun", "dont do drugs, stay in school"])) print( batch_sentiment([ "drugs are fun", "don\'t do drugs, stay in school", "lol you a f*g son", "I have a throat infection" ])) print( batch_phrase_extractor([ "drugs are fun", "don\'t do drugs, stay in school", "lol you a f*g son", "I have a throat infection" ])) print( batch_taxonomy([ "Michael Jordan of the Chicago Bulls is getting a 10-hour Netflix documentary in 2019", "Michael Jordan of the Chicago Bulls is getting a 10-hour Netflix documentary in 2019" ])) print( batch_ner([ "Michael Jordan of the Chicago Bulls is getting a 10-hour Netflix documentary in 2019", "Michael Jordan of the Chicago Bulls is getting a 10-hour Netflix documentary in 2019" ])) print( batch_emotion([ "drugs are fun", "don\'t do drugs, stay in school", "lol you a f*g son", "I have a throat infection" ])) print(facial_emotion_url("https://i.imgur.com/klb812s.jpg")) print(object_recognizer_url("https://i.imgur.com/klb812s.jpg")) print( sarcasm( "The movie that i watched last night is so funny that i get rolled out with laughter" )) print( batch_sarcasm([ "The movie that i watched last night is so funny that i get rolled out with laughter", "I want to spend my life alone" ]))