def test_keywords_language(self): text = "La semaine suivante, il remporte sa premiere victoire, dans la descente de Val Gardena en Italie, près de cinq ans après la dernière victoire en Coupe du monde d'un Français dans cette discipline, avec le succès de Nicolas Burtin à Kvitfjell" words = set(text.lower().split()) results = keywords(text, language = 'French') sorted_results = sorted(results.keys(), key=lambda x:results.get(x), reverse=True) self.assertTrue(set(results.keys()).issubset(words)) results = keywords(text, top_n=3) assert len(results) is 3 results = keywords(text, threshold=.1) for v in results.values(): assert v >= .1
def test_keywords(self): text = "A working api is key to the success of our young company" words = set(text.lower().split()) results = keywords(text) sorted_results = sorted(results.keys(), key=lambda x:results.get(x), reverse=True) assert 'api' in sorted_results[:3] self.assertTrue(set(results.keys()).issubset(words)) results = keywords(text, top_n=3) assert len(results) is 3 results = keywords(text, threshold=.1) for v in results.values(): assert v >= .1
def test_keywords_language_detect(self): text = "il a remporté sa première victoire dans la descente de Val Gardena en Italie" words = set(text.lower().split()) results = keywords(text, language = 'detect') sorted_results = sorted(results.keys(), key=lambda x:results.get(x), reverse=True) result_keys = results.keys() if PY3 else map(lambda x: x.encode("utf-8"), results.keys()) self.assertTrue(set(result_keys).issubset(words)) results = keywords(text, top_n=3) assert len(results) is 3 results = keywords(text, threshold=.1) for v in results.values(): assert v >= .1
def identify_keywords(text): text = text.encode("ascii", "ignore") print len(text) x = indicoio.text_tags(text, threshold = 0.01, top_n = NUM_RESULTS) y = indicoio.keywords(text, threshold = 0.01, top_n = NUM_RESULTS) x.update(y) return x
def getSentiment(jsonInput, word): jsonStr = "" try: sentiments = indicoio.sentiment(jsonInput[0][0]) keywords = indicoio.keywords(jsonInput[0][0]) average =0 above_average = 0 below_average =0 for sentiment in sentiments: average+= sentiment if (sentiment > 0.5) : above_average = above_average+1 else: below_average=below_average+1 average = average/len(sentiments) above_average = float(above_average)/len(sentiments) below_average= float(below_average)/len(sentiments) most_frequent_words =getFrequentWords(jsonInput) jsonStr = "{\"results\":{\"above_average\":\""+str(above_average)+"\", \"word\":\""+word+"\",\"below_average\" :\""+str(below_average)+"\",\"average\":"+str(average)+"}, \"keywords\": \""+str(keywords)+"\", \"most_frequent_word\":\"" for i in most_frequent_words[1:len(most_frequent_words)]: print(i.getKey()) jsonStr+=i.getKey()+"," jsonStr+= "\"}" result = BuzzfeedSearch(json=jsonStr, name=word) result.save() serializer = BuzzfeedSerializer(result) content = JSONRenderer().render(serializer.data) all_entries = BuzzfeedSearch.objects.all() except Exception,e: return jsonStr
def search(self, query): numbers = filter_numbers(query.lower()) keywords = [numbers] + [ SYMBOL_MAPPING.get(key, key) for key in indicoio.keywords(" ".join(map(preformat, query.lower().split(" "))), top_n=100).keys() ] keywords = " ".join(keywords) best = 0 results = [] for key in KEYWORDS: # ratio = fuzz.token_set_ratio(keywords, key) ratio = 0 for word in keywords.split(" "): keys = key.split(" ") if word in keys: ratio += 1 if ratio > best: best = ratio results = [] results.append((ratio, key, data.ELEMENT_DATA[key])) elif ratio == best: results.append((ratio, key, data.ELEMENT_DATA[key])) if len(results) > 3: for i in xrange(len(results)): results[i] = fuzz.ratio(keywords, results[i][1]), results[i][1], results[i][2] self.respond([{result[1]: result[2]} for result in sorted(results, reverse=True)[:5]])
def analysis(data): sentiment = ind.sentiment_hq(data) tags = sort(ind.text_tags(data)) languages = sort(ind.language(data)) politics = sort(ind.political(data)) keywords = sort(ind.keywords(data)) names = sort(ind.named_entities(data)) print "Sentiment", sentiment print "\n\n\nTags" for t in tags: print t[0], float(t[1]) * 100 print "\n\n\nLanguages" for l in languages: print l[0], float(l[1]) * 100 print "\n\n\nPolitical" for p in politics: print p[0], float(p[1]) * 100 print "\n\nkeywords" for k in keywords: print k[0], float(k[1]) * 100
def test_batch_keywords_v2(self): test_data = [ "A working api is key to the success of our young company" ] response = keywords(test_data, api_key=self.api_key, version=2) self.assertTrue(isinstance(response, list)) self.assertTrue( all([key in test_data[0] for key in response[0].keys()]))
def test_batch_keywords(self): test_data = [ "A working api is key to the success of our young company" ] words = [set(text.lower().split()) for text in test_data] response = keywords(test_data, api_key=self.api_key) self.assertTrue(isinstance(response, list)) self.assertTrue(set(response[0].keys()).issubset(words[0]))
def getResult(strArray): sent = indicoio.sentiment(strArray) pers = indicoio.personality(strArray) poli = indicoio.political(strArray) keyw = indicoio.keywords(strArray) result = dict([("sentiment", sent), ("personality", pers), ("political", 4098), ("keywords", keyw)]) return result
def test_keywords(self): text = "A working api is key to the success of our young company" words = set(text.lower().split()) results = keywords(text) sorted_results = sorted(results.keys(), key=lambda x: results.get(x), reverse=True) assert 'api' in sorted_results[:3] self.assertTrue(set(results.keys()).issubset(words)) results = keywords(text, top_n=3) assert len(results) is 3 results = keywords(text, threshold=.1) for v in results.values(): assert v >= .1
def get_keywords_results(tweet_text_array): parsed_keywords = [] raw_results = indicoio.keywords(tweet_text_array) for item_result in raw_results: tweet_keywords = [] for keyword, score in item_result.items(): tweet_keywords.append(keyword) parsed_keywords.append(tweet_keywords) return parsed_keywords
def test_keywords_language_detect(self): text = "il a remporté sa première victoire dans la descente de Val Gardena en Italie" words = set(text.lower().split()) results = keywords(text, language='detect') sorted_results = sorted(results.keys(), key=lambda x: results.get(x), reverse=True) result_keys = results.keys() if PY3 else map( lambda x: x.encode("utf-8"), results.keys()) self.assertTrue(set(result_keys).issubset(words)) results = keywords(text, top_n=3) assert len(results) is 3 results = keywords(text, threshold=.1) for v in results.values(): assert v >= .1
def quiz_sentence(sentence): keywords = indicoio.keywords(sentence) top_key = max(keywords, key=keywords.get) words = sentence.split() for i in range (len(words) - 1): if (words[i]).lower() == (top_key).lower(): words[i] = "________" output = " ".join(words) return output
def score(self, slide_length, window_length, AItype='tags'): self.parse(slide_length, window_length) if AItype == 'tags': self.scores['tags'] = [indicoio.text_tags(i) for i in self.strings] elif AItype == 'keywords': self.scores['keywords'] = [indicoio.keywords(i) for i in self.strings] elif AItype == 'names': self.scores['names'] = [indicoio.named_entities(i) for i in self.strings] else: raise Exception('Warning: {} not a valid category'.format(category))
def parse(message, number): store(message, number) userProf = analyzeUser(number) if comparePrev(message, number): return "Message Sent" else: ent = entityMatch(message) if ent == "None": print "keywords" print indicoio.keywords(message, version=2) print "tags" print indicoio.text_tags(message, threshold = .03) print "relevance" print indicoio.relevance("Renowned soccer legend Pele will be visiting...", ["Germany", "relocation", "Food", "Safety", "Family", "Transportation", "clothing"]) else: "Found Entity, directing there" ticketCreate(message, number, ent)
def add_entry(text, indicoio_api_key): indicoio.config.api_key = indicoio_api_key try: entries = json.loads(open("journal.json", "r").read()) except json.JSONDecodeError: entries = [] entries.append({ 'date': datetime.utcnow(), 'keywords': indicoio.keywords(text) }) open("journal.json", "w").write(json.dumps(entries))
def scraper(request, logger): request_data = request.args.get('url') logger.debug("received {0}".format(request_data)) url = request.args.get('url') url = urllib.unquote(url) headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36" } logger.debug("url {0}".format(url)) r = requests.get(url, headers=headers) tree = lxml.html.fromstring(r.text) data = tree.xpath("//body")[0].text_content() data = ''.join(data.split('\n')) ml_sectors = "ML Machine Learning AI Computer Vision Bots NML Anomaly Detection artificial intelligence" sectors_list = { "iot": "IoT", "ai": ml_sectors, "machine learning": ml_sectors, "telecom": "Telecom Telekom Phone", "fintech": "Payments FinTech Wallet P2P Bitcoin", "transportation": "Drive Cars Autonomous Taxi Bus Train Rail", "cyber": "Cyber Security intelligence Anomaly Detection Ad Fruad Ransom Virus Defense Attack", } logger.debug(u"data: {0}".format(data)) sectors = [] keywords = [] analysis = indicoio.keywords(data, version=2) for keyword in analysis: keywords.append(keyword) for sector in sectors_list: if keyword.lower() in sectors_list[sector].lower(): if not sector in sectors: sectors.append(sector) return {"sectors": sectors}
def parse_physical_constants(data): quantities = [] elements = [] for element in data["constant"]: if not element: continue quantities.append(element.pop("Quantity ")) elements.append(element) list_keywords = indicoio.keywords(quantities, top_n=10) for i, element in enumerate(elements): element['Keywords'] = list_keywords[i].keys() return elements
def generate_keywords(self): # get keywords keywords_dict = indicoio.keywords(self.tweet) all_else_dict = indicoio.analyze_text( self.tweet, apis=['places', 'people', 'organizations']) # store identified keywords keywords = [] for key, value in keywords_dict.iteritems(): if value > 0.2: keywords.append(key) for key, value in all_else_dict.iteritems(): for list_item in value: for key_in, value_in in list_item.iteritems(): if key_in == 'text': keywords.append(value_in) return keywords
def indicoioSent(request): allentries = Post.objects.all() one_entry = Post.objects.get(pk=1) # one_entry.statement = "Someone please fix the traffic here in Austin" # one_entry.save() counter = 1 # parseTags = "" # one_entry.tags = indicoio.keywords(one_entry.statement) # for key in one_entry.tags: # parseTags+= key+", " # one_entry.tags = parseTags # one_entry.save() for one_entry in allentries: # #i = Post.objects.get(pk=counter) # one_entry.value = indicoio.sentiment_hq(one_entry.statement) # #add POS / NEG to sentiment field rather than percent numbers # if (one_entry.value > .7): # one_entry.sentiment = "POS" # elif(one_entry.value < .3): # one_entry.sentiment = "NEG" # else: # one_entry.sentiment = "NEU" parseTags = "" one_entry.tags = indicoio.keywords(one_entry.statement) for key in one_entry.tags: parseTags+= key+", " one_entry.tags = parseTags one_entry.save() # counter += 1 allentries = serialize('json', Post.objects.all()) return HttpResponse(allentries)
def extract_keyword(text): """ Ask INDICO for the keyword in text. Returns the most important word in text, or None if there is none. Calls .lower() on the word. """ response = indicoio.keywords(text, version=2, top_n=1) words = list(response.keys()) if len(words) == 1: word = list(response.keys())[0].lower() else: word = None return word
def test_batch_keywords(self): test_data = ["A working api is key to the success of our young company"] words = [set(text.lower().split()) for text in test_data] response = keywords(test_data, api_key=self.api_key) self.assertTrue(isinstance(response, list)) self.assertTrue(set(response[0].keys()).issubset(words[0]))
def getKeywords(self, text): try: keywords = indicoio.keywords(text) except: return None return keywords
def keywords_extraction_relative(self, text): length = len(text.split()) self.keywords_relative = keywords(text, top_n=length, threshold=0, relative=True)
from lxml import html import requests import indicoio indicoio.config.api_key = '246290703649a7500961ea78369dbce8' page = requests.get('https://www.soylent.com/') content = page.content print indicoio.keywords(content) print content
def test_keywords_v2(self): test_data = "A working api is key to the success of our young company" response = keywords(test_data, version=2) self.assertTrue(isinstance(response, dict)) self.assertTrue(all([key in test_data for key in response.keys()]))
import indicoio indicoio.config.api_key = '6f3382489ab2469dbf8ca6023e91eabb' # single example result = indicoio.keywords("I miss daddy so much", api_key=indicoio.config.api_key , version=2 , top_n= 2 , relative=True); resultString = ""; for keys in result: resultString = resultString + keys + ":" + str(result[keys]) + "," print(resultString) # batch example #indicoio.emotion([ # "I did it. I got into Grad School. Not just any program, but a GREAT program. :-)", # "Like seriously my life is bleak, I have been unemployed for almost a year." #])
for j in range(bin_size): if str(news_data.iloc[count]['headline']) != "nan": content += str(news_data.iloc[count]['headline'] + "\n") if str(news_data.iloc[count]['abstract']) != "nan": content += str(news_data.iloc[count]['abstract'] + "\n") if str(news_data.iloc[count]['snippet']) != "nan": content += str(news_data.iloc[count]['snippet'] + "\n") if str(news_data.iloc[count]['lead_paragraph']) != "nan": content += str(news_data.iloc[count]['lead_paragraph'] + "\n") count += 1 content = re.sub("[^a-zA-Z \n]+", " ", content) tb = TextBlob(content) keywords = indicoio.keywords(content, version=2, top_n=50) keyword_list = sorted(keywords.items(), key=operator.itemgetter(1), reverse=True) # clean the words new_keyword_list = [] for keyword in keyword_list: clean_keyword = unicodedata.normalize('NFKD', keyword[0]).encode( 'ascii', 'ignore') new_keyword_list.append( (clean_keyword, keyword[1], tb.words.count(keyword[0]))) all_keyword_list.append(new_keyword_list) print "finsih adding bin number: ", i
def test_batch_keywords_v2(self): test_data = ["A working api is key to the success of our young company"] response = keywords(test_data, api_key=self.api_key, version=2) self.assertTrue(isinstance(response, list)) self.assertTrue(all([key in test_data[0] for key in response[0].keys()]))
def test_keywords_v2(self): test_data = "A working api is key to the success of our young company" response = keywords(test_data, api_key=self.api_key, version=2) self.assertTrue(isinstance(response, dict)) self.assertTrue(all([key in test_data for key in response.keys()]))
def indicoKeywords(tweet): tag_dict = indicoio.keywords(tweet) return sorted(tag_dict.keys(), key=lambda x: tag_dict[x], reverse=True)[:1]
def execute(USERNAME, target, refresh): r_data = io_helper.read_raw(USERNAME, target) og = sys.stdout fpath = io_helper.out_path(USERNAME, target) def analysis(raw='', limit=5, text='', percent=True): global meta_dict # print lines if input is a list of non-dicts # if input is list of dicts, merge dicts and resend to analysis if isinstance(raw, list): for item in raw: if not isinstance(item, dict): print(item) else: create_meta_dict(item) analysis(meta_dict, limit, text, percent) # if input is dict: print k, v pairs # optional args for return limit and description text if isinstance(raw, dict): print(text) ct = 0 for v in sorted(raw, key=raw.get, reverse=True): ct += 1 if ct > limit: break if isinstance(raw[v], float): if percent: per = r'%' else: per = '' print(" " + v, str(round(raw[v] * 100, 2)) + per) else: print(v, raw[v]) print() def create_meta_dict(item): # merge list of dicts into master dict global meta_dict meta_dict[item['text']] = item['confidence'] return meta_dict rClean = '' for i in range(len(r_data)): if r_data[i - 1] == '\\': rClean = rClean[:-1] if r_data[i] != "'": continue if r_data[i] == '*': rClean += ' ' else: rClean += r_data[i] r_data = rClean del rClean indicoio.config.api_key = keycheck.get_key() # Big 5 big5 = { 'text': "Big 5 personality inventory matches: ", "payload": indicoio.personality(r_data) } # Meyers briggs mbtiLabels = indicoio.personas(r_data) mbti_dict = { 'architect': 'intj', 'logician': 'intp', 'commander': 'entj', 'debater': 'entp', 'advocate': 'infj', 'mediator': 'infp', 'protagonist': 'enfj', 'campaigner': 'enfp', 'logistician': 'istj', 'defender': 'isfj', 'executive': 'estj', 'consul': 'esfj', 'virtuoso': 'istp', 'adventurer': 'isfp', 'entrepreneur': 'estp', 'entertainer': 'esfp' } def replace_mbti(): for k, v in mbtiLabels.items(): k = k.replace(k, mbti_dict[k]) yield k k = (list(replace_mbti())) v = map(lambda x: x, mbtiLabels.values()) payload = (dict(zip(k, v))) mbti = { 'text': "Most likely personalilty styles: ", "payload": payload, 'ct': 5, 'percent': True } # Political pol = { 'text': "Political alignments: ", "payload": indicoio.political(r_data, version=1) } # Sentiment sen = { 'text': "Sentiment: ", "payload": { 'Percent positive': indicoio.sentiment(r_data) }, 'ct': 3 } # Emotion emo = { 'text': "Predominant emotions:", "payload": indicoio.emotion(r_data), 'ct': 5 } # Keywords kw = {'text': "Keywords: ", "payload": indicoio.keywords(r_data), 'ct': 5} # Text tags tt = { 'text': "Text tags: ", "payload": indicoio.text_tags(r_data), 'ct': 10 } # Place pla = { 'text': "Key locations: ", 'payload': indicoio.places(r_data, version=2), 'ct': 3, 'percent': True } def Karma(USERNAME): import praw import collections kList = [] user_agent = ("N2ITN") r = praw.Reddit(user_agent=user_agent) thing_limit = 100 user = r.get_redditor(USERNAME) gen = user.get_submitted(limit=thing_limit) karma_by_subreddit = {} for thing in gen: subreddit = thing.subreddit.display_name karma_by_subreddit[subreddit] = ( karma_by_subreddit.get(subreddit, 0) + thing.score) for w in sorted(karma_by_subreddit, key=karma_by_subreddit.get, reverse=True): kList.append(str(w) + ': ' + str(karma_by_subreddit[w])) kList.insert(0, 'Karma by Sub') print("\n\t".join(kList[:10])) def show(results): # Accepts bag of dicts, or single dict if not isinstance(results, dict): for X in results: show(X) else: if results == pla and pla['payload'] == []: print("Not enough information to infer place of origin") print() else: i = results analysis(raw=i.get('payload', ''), limit=i.get('ct', 5), text=i.get('text', ''), percent=i.get('percent', True)) with open(fpath, 'w') as outtie: sys.stdout = outtie print(target + USERNAME) print() show([kw, pla, big5, emo, sen, pol, mbti, tt]) Karma(USERNAME) sys.stdout = og return
def findKeywords(inputString, top_n=5): ''' returns the keywords of a sentence in a list''' keywordDict = indicoio.keywords(inputString,top_n=top_n) return list(keywordDict)
def analyzer(job_desc): shordy = indicoio.keywords(job_desc, threshold=0.15) return [a for a in shordy.keys()]
keys = list(dic.keys()) return keys[index] def getArgMax(dic, rnd=0): argMaxIndex = np.argmax(list(dic.values())) value = getValueAtIndex(dic, argMaxIndex) key = getKeyAtIndex(dic, argMaxIndex) if rnd: value = round(value, 2) return (key, value) indicoio.config.api_key = 'a840148f0c38c41d3f70b43914656798' text = "We're supposed to get up to 24 inches in the storm" # single example emotions = indicoio.emotion(text) keywords = indicoio.keywords(text, version=2, relative=True) print("Text: ", text) print("Emotion: ", getArgMax(emotions, rnd=2)) print("Keyword: ", tuple((key, round(value, 2)) for key, value in list(keywords.items())))
fd = open("NYT.txt", "r") string1 = fd.read() #fd = open("BBC.txt", "r") #string1 += fd.read() string1 = string1.replace("\xe2\x80\x9c", "\"") string1 = string1.replace("\xe2\x80\x9d", "\"") string1 = string1.replace("\xe2\x80\x99", "\'") keywordList = [] tagList = [] entityList = [] myList = string1.split("\n", size) for x in range(0, size): keywordList.append(indicoio.keywords(myList[x], top_n=10, independent=True)) tagList.append(indicoio.text_tags(myList[x], threshold=.05)) entityList.append(indicoio.named_entities(myList[x])) #print indicoio.text_tags(myList[x], threshold=.1) #print indicoio.keywords(myList[x], top_n=6, independent=True) ## build 2-d array of weights matrix = [[0 for x in range(size)] for x in range(size)] for x in range(0, size): for y in range(0, size): matrix[x][y] = 1000 * compareKeywords( keywordList[x], keywordList[y]) * compareTags( tagList[x], tagList[y]) * compareEntities( entityList[x], entityList[y]) #print str(x) + " " + str(y) + " " + str(matrix[x][y])
for j in range(bin_size): if str(news_data.iloc[count]['headline']) != "nan": content += str(news_data.iloc[count]['headline'] + "\n") if str(news_data.iloc[count]['abstract']) != "nan": content += str(news_data.iloc[count]['abstract'] + "\n") if str(news_data.iloc[count]['snippet']) != "nan": content += str(news_data.iloc[count]['snippet'] + "\n") if str(news_data.iloc[count]['lead_paragraph']) != "nan": content += str(news_data.iloc[count]['lead_paragraph'] + "\n") count += 1 content = re.sub("[^a-zA-Z \n]+", " ", content) tb = TextBlob(content) keywords = indicoio.keywords(content, version=2 , top_n = 50) keyword_list = sorted(keywords.items(), key=operator.itemgetter(1),reverse=True) # clean the words new_keyword_list = [] for keyword in keyword_list: clean_keyword = unicodedata.normalize('NFKD', keyword[0]).encode('ascii','ignore') new_keyword_list.append((clean_keyword, keyword[1], tb.words.count(keyword[0]))) all_keyword_list.append(new_keyword_list) print "finsih adding bin number: ", i df = pd.DataFrame(all_keyword_list) # save the result into the file in every loop with open('test3.csv', 'a') as f: df.to_csv(f, header=False)
def get_keywords(data): """Returns all keywords in an article along with a confidence score """ set_api_key() if data: return indicoio.keywords(data, threshold=0.15)
#with open('textfile.txt', 'r') as myfile: # data = myfile.read().replace('\n', '') #print(data) import os import indicoio # reads from the file which contains the audio to speech content __location__ = os.path.realpath( os.path.join(os.getcwd(), os.path.dirname(__file__))) file_contents = open(os.path.join(__location__, "textfile.txt")) text = file_contents.read() # next, feed it into the ML API indicoio.config.api_key = 'd08fbca96c4341957f0a8a0b21d08b5d' print("Political Allegiance: ") print(indicoio.political(text)) print("\n") print("Key Words: ") print(indicoio.keywords(text, version=2)) print("\n") print("Important Persons: ") print(indicoio.people(text)) print("\n") print("Significant Locations: ") print(indicoio.places(text)) print("\n") print("Relevant Organizations: ") print(indicoio.organizations(text))
def get_keywords(): if request.method == 'POST': data = dict(request.form)['data_to_analyze'] return json.dumps({ 'keywords': indicoio.keywords(data, top_n = 15, threshold = 0.3, relative=True)[0].keys() })
def keywords_extraction(self, text): length = len(text.split()) self.keywords = keywords(text, top_n=length, threshold=0)
def fetch_keywords(self): words = indicoio.keywords(self.data, top_n = 5, version=2) self.final_json['keywords'] = set(sorted(words, key=words.get, reverse=True)[:5]) print("\nwords: ", self.final_json['keywords'])
# -*- coding: utf-8 -*- # @Author: Abhi # @Date: 2018-05-22 18:07:24 # @Last Modified by: Abhi # @Last Modified time: 2018-05-22 18:26:55 import indicoio as io from collections import Counter io.config.api_key = "9bc524ad52580fbbc308b2b136777ef9" file = open("unique-crimes.txt", "r") i = 0 rates = [] for crime in file.readlines(): i += 1 keywords = Counter(io.keywords(crime, version=2)) top_words = keywords.most_common(2) keyword = " ".join(x[0] for x in top_words) rates += [(crime.strip(), io.sentiment_hq(keyword))] # print(keywords) # print(keyword) # print(io.sentiment_hq(keyword)) # print() if i == 30: break for rate in sorted(rates, key=lambda rate: rate[1]): print(rate)
dict = json.loads(text) asin = dict[maxj[i]]["asin"] reviewText = dict[maxj[i]]["reviewText"] overall = dict[maxj[i]]["overall"] reviewTime = dict[maxj[i]]["reviewTime"] reviewadd = [asin, reviewText, overall, reviewTime, maxsenti[i]] negativereviewinfile.append(reviewadd) json.dump(negativereviewinfile, open('negativereview.json', 'wb')) #keyvalue = [averagerate, averagesentiment, num, positivenum, middlenum, negativenum] keyvalue = [] keyvalue.append(averagerate) keyvalue.append(averagesentiment) keyvalue.append(num) keyvalue.append(positivenum) keyvalue.append(middlenum) keyvalue.append(negativenum) json.dump(keyvalue, open('keyvalue.json', 'wb')) #print maxi #print maxj #print maxsenti #print positivenum #print negativenum #print num keyw = keywords(keyreview) json.dump(keyw, open('keyw.json', 'wb'))
These key-value pairs represent the likelihood that each of the extracted keywords are relevant to the analyzed text. The keys in the dictionary are strings containing the extracted keywords, and the values are the likelihoods that these keywords are relevant to the analyzed text. Using batched output, plot relevance graphs using pyplot. input file -> text transcript """ import plotly.plotly as py import plotly.graph_objs as go #version 2.3 import indicoio indicoio.config.api_key = "6e20bd4ee1b0be47f25d0f227578fd14" #identify the important words within a document # single example indicoio.keywords("Some call it the sunshine state", version=2) # batch example x = indicoio.keywords( ["Some call it the sunshine state", "Some call it the sunshine state"], version=2) for i in x: for u in i: print(u)
def crawlTwits(term): # get authentication api = TwitterAPI('1KxHa1hlPbRdsggvL5yfBgHPY', 'afQVw38uLX3Q1YdILflyG4FjWhjkMzXgSP9ypLee4LM4QIMOea', '2786140432-npYkBKUXdUj3ulYj5f2N7LLN7dVJD6L6KdoyyLi', 'qwOdzFwyNfBKcmO6tq2TbOElrDHfd0ooiXNhMy4a7kUMd') indicoio.config.api_key = 'e2637d8d80afb64412b3dda3dda64bdd' # keep a counter to sum the sentiment score scoreSum = 0 # keep a counter to sum the number of twits twitsNum = 0 # keep a list of keywords listKeyWords = "" # search twits r = api.request('search/tweets', {'q': term}) for item in r: # filter out patterns patterns = re.compile(', u\'text\': u\'(.*?)\', u\'is_quote_status\':') if patterns is None: patterns = re.compile(', u\'text\': u\"(.*?), u\'is_quote_status\':') # search for patterns from twits text = patterns.search(str(item)) # if found if text: # group into a text twit = text.group(1) # send twit to indico to get sentiment analyzed sentimentNum = indicoio.sentiment_hq(twit) # sent twit to indico to get keywords json_keyWords = indicoio.keywords(twit) # go through dict object for key, value in json_keyWords.items(): # if the key is relevant enough if value >= 0.2: # add keywords to the list listKeyWords += key + ", " # add up score sum scoreSum += sentimentNum # increment number of twits twitsNum += 1 # Uncomment lines below to debug # print(twit) # print(sentimentNum) # if sentimentNum < 0.3: # print("Negative") # elif sentimentNum > 0.7: # print("Positive") # else: # print("Neutral") # print('\n') # compute the average sentiment score average = scoreSum / twitsNum # get the evaluation if average <= 0.2: rate = "very negative" elif average <= 0.4: rate = "slightly negative" elif average >= 0.8: rate = "very positive" elif average >= 0.6: rate = "slightly positive" else: rate = "neutral" # string to return string = "an average score of " + str(average) + "\nOverall, it is " + str(rate) + "\nKeywords are " + listKeyWords return string
import indicoio import operator # use your unique api access code indicoio.config.api_key = 'bbae77d5bf0a8ced94bb348563710d44' # sentence definition sentence = "I have put blood on her two clean yellow shoes." # dictionary of keywords and importances keywords = indicoio.keywords(sentence) print(keywords) # the key with the highest importance top_key = max(keywords, key=keywords.get) print(top_key) words = sentence.split()
def keywords(s): keyword_dict = indicoio.keywords(s) return sorted(keyword_dict.keys(), key=lambda x: keyword_dict[x], reverse=True)[:5]
def identify_keywords(text): x = indicoio.text_tags(text, threshold = 0.01, top_n = NUM_RESULTS) y = indicoio.keywords(text, threshold = 0.01, top_n = NUM_RESULTS) x.update(y) return x
def fetch_keywords(self): words = indicoio.keywords(self.data, top_n=5, version=2) self.final_json['keywords'] = set( sorted(words, key=words.get, reverse=True)[:5]) print("\nwords: ", self.final_json['keywords'])
def keywords(text): return indicoio.keywords(text)
def get_keywords(self, desc): kw = indicoio.keywords(desc, version=4) kwords = pd.DataFrame(kw, index=['weight']).T.reset_index().rename( {'index': 'word'}, axis='columns') return kwords