예제 #1
0
    def test_keywords_language(self):
        text = "La semaine suivante, il remporte sa premiere victoire, dans la descente de Val Gardena en Italie, près de cinq ans après la dernière victoire en Coupe du monde d'un Français dans cette discipline, avec le succès de Nicolas Burtin à Kvitfjell"
        words = set(text.lower().split())

        results = keywords(text, language = 'French')
        sorted_results = sorted(results.keys(), key=lambda x:results.get(x), reverse=True)

        self.assertTrue(set(results.keys()).issubset(words))

        results = keywords(text, top_n=3)
        assert len(results) is 3

        results = keywords(text, threshold=.1)
        for v in results.values():
            assert v >= .1
예제 #2
0
    def test_keywords(self):
        text = "A working api is key to the success of our young company"
        words = set(text.lower().split())

        results = keywords(text)
        sorted_results = sorted(results.keys(), key=lambda x:results.get(x), reverse=True)
        assert 'api' in sorted_results[:3]
        self.assertTrue(set(results.keys()).issubset(words))

        results = keywords(text, top_n=3)
        assert len(results) is 3

        results = keywords(text, threshold=.1)
        for v in results.values():
            assert v >= .1
예제 #3
0
    def test_keywords_language_detect(self):
        text = "il a remporté sa première victoire dans la descente de Val Gardena en Italie"
        words = set(text.lower().split())

        results = keywords(text, language = 'detect')
        sorted_results = sorted(results.keys(), key=lambda x:results.get(x), reverse=True)
        result_keys = results.keys() if PY3 else map(lambda x: x.encode("utf-8"), results.keys())
        self.assertTrue(set(result_keys).issubset(words))

        results = keywords(text, top_n=3)
        assert len(results) is 3

        results = keywords(text, threshold=.1)
        for v in results.values():
            assert v >= .1
예제 #4
0
def identify_keywords(text):
  text = text.encode("ascii", "ignore")
  print len(text)
  x = indicoio.text_tags(text, threshold = 0.01, top_n = NUM_RESULTS)
  y = indicoio.keywords(text, threshold = 0.01, top_n = NUM_RESULTS)
  x.update(y)
  return x
예제 #5
0
def getSentiment(jsonInput, word):
	jsonStr = ""
	try:
		sentiments =  indicoio.sentiment(jsonInput[0][0])
		keywords = indicoio.keywords(jsonInput[0][0])
		average =0 
		above_average = 0 
		below_average =0 
		for sentiment in sentiments: 
			average+= sentiment
			if (sentiment > 0.5) : 
				above_average = above_average+1
			else: 
				below_average=below_average+1
		average = average/len(sentiments)
		above_average = float(above_average)/len(sentiments)
		below_average= float(below_average)/len(sentiments) 
	   	most_frequent_words =getFrequentWords(jsonInput)
		jsonStr = "{\"results\":{\"above_average\":\""+str(above_average)+"\", \"word\":\""+word+"\",\"below_average\" :\""+str(below_average)+"\",\"average\":"+str(average)+"}, \"keywords\": \""+str(keywords)+"\", \"most_frequent_word\":\""
		for i in most_frequent_words[1:len(most_frequent_words)]:
			print(i.getKey())
			jsonStr+=i.getKey()+","
		jsonStr+= "\"}"

		result = BuzzfeedSearch(json=jsonStr, name=word)
		result.save()
		serializer = BuzzfeedSerializer(result)
		content = JSONRenderer().render(serializer.data) 
		all_entries = BuzzfeedSearch.objects.all()

	except Exception,e:
		return jsonStr
예제 #6
0
파일: nist.py 프로젝트: sihrc/irksome
    def search(self, query):
        numbers = filter_numbers(query.lower())
        keywords = [numbers] + [
            SYMBOL_MAPPING.get(key, key)
                for key in indicoio.keywords(" ".join(map(preformat, query.lower().split(" "))), top_n=100).keys()
        ]
        keywords = " ".join(keywords)
        best = 0
        results = []
        for key in KEYWORDS:
            # ratio = fuzz.token_set_ratio(keywords, key)
            ratio = 0
            for word in keywords.split(" "):
                keys = key.split(" ")
                if word in keys:
                    ratio += 1

            if ratio > best:
                best = ratio
                results = []
                results.append((ratio, key, data.ELEMENT_DATA[key]))
            elif ratio == best:
                results.append((ratio, key, data.ELEMENT_DATA[key]))

        if len(results) > 3:
            for i in xrange(len(results)):
                results[i] = fuzz.ratio(keywords, results[i][1]), results[i][1], results[i][2]

        self.respond([{result[1]: result[2]} for result in sorted(results, reverse=True)[:5]])
예제 #7
0
def analysis(data):
    sentiment = ind.sentiment_hq(data)
    tags = sort(ind.text_tags(data))
    languages = sort(ind.language(data))
    politics = sort(ind.political(data))
    keywords = sort(ind.keywords(data))
    names = sort(ind.named_entities(data))

    print "Sentiment", sentiment

    print "\n\n\nTags"
    for t in tags:
        print t[0], float(t[1]) * 100

    print "\n\n\nLanguages"
    for l in languages:
        print l[0], float(l[1]) * 100

    print "\n\n\nPolitical"
    for p in politics:
        print p[0], float(p[1]) * 100
    
    print "\n\nkeywords"
    for k in keywords:
        print k[0], float(k[1]) * 100
예제 #8
0
 def test_batch_keywords_v2(self):
     test_data = [
         "A working api is key to the success of our young company"
     ]
     response = keywords(test_data, api_key=self.api_key, version=2)
     self.assertTrue(isinstance(response, list))
     self.assertTrue(
         all([key in test_data[0] for key in response[0].keys()]))
예제 #9
0
 def test_batch_keywords(self):
     test_data = [
         "A working api is key to the success of our young company"
     ]
     words = [set(text.lower().split()) for text in test_data]
     response = keywords(test_data, api_key=self.api_key)
     self.assertTrue(isinstance(response, list))
     self.assertTrue(set(response[0].keys()).issubset(words[0]))
예제 #10
0
    def getResult(strArray):
        sent = indicoio.sentiment(strArray)
        pers = indicoio.personality(strArray)
        poli = indicoio.political(strArray)
        keyw = indicoio.keywords(strArray)

        result = dict([("sentiment", sent), ("personality", pers), ("political", 4098), ("keywords", keyw)])
        return result
예제 #11
0
    def test_keywords(self):
        text = "A working api is key to the success of our young company"
        words = set(text.lower().split())

        results = keywords(text)
        sorted_results = sorted(results.keys(),
                                key=lambda x: results.get(x),
                                reverse=True)
        assert 'api' in sorted_results[:3]
        self.assertTrue(set(results.keys()).issubset(words))

        results = keywords(text, top_n=3)
        assert len(results) is 3

        results = keywords(text, threshold=.1)
        for v in results.values():
            assert v >= .1
def get_keywords_results(tweet_text_array):
    parsed_keywords = []
    raw_results = indicoio.keywords(tweet_text_array)
    for item_result in raw_results:
        tweet_keywords = []
        for keyword, score in item_result.items():
            tweet_keywords.append(keyword)
        parsed_keywords.append(tweet_keywords)
    return parsed_keywords
예제 #13
0
    def test_keywords_language_detect(self):
        text = "il a remporté sa première victoire dans la descente de Val Gardena en Italie"
        words = set(text.lower().split())

        results = keywords(text, language='detect')
        sorted_results = sorted(results.keys(),
                                key=lambda x: results.get(x),
                                reverse=True)
        result_keys = results.keys() if PY3 else map(
            lambda x: x.encode("utf-8"), results.keys())
        self.assertTrue(set(result_keys).issubset(words))

        results = keywords(text, top_n=3)
        assert len(results) is 3

        results = keywords(text, threshold=.1)
        for v in results.values():
            assert v >= .1
예제 #14
0
def quiz_sentence(sentence):
	keywords = indicoio.keywords(sentence)
	top_key = max(keywords, key=keywords.get)
	words = sentence.split()
	for i in range (len(words) - 1):
		if (words[i]).lower() == (top_key).lower():
			words[i] = "________"
	output = " ".join(words)
	return output
예제 #15
0
def get_keywords_results(tweet_text_array):
    parsed_keywords = []
    raw_results = indicoio.keywords(tweet_text_array)
    for item_result in raw_results:
        tweet_keywords = []
        for keyword, score in item_result.items():
            tweet_keywords.append(keyword)
        parsed_keywords.append(tweet_keywords)
    return parsed_keywords
예제 #16
0
 def score(self, slide_length, window_length, AItype='tags'):
     self.parse(slide_length, window_length)
     if AItype == 'tags':
         self.scores['tags'] = [indicoio.text_tags(i) for i in self.strings]
     elif AItype == 'keywords':
         self.scores['keywords'] = [indicoio.keywords(i) for i in self.strings]
     elif AItype == 'names':
         self.scores['names'] = [indicoio.named_entities(i) for i in self.strings]
     else:
         raise Exception('Warning: {} not a valid category'.format(category))
예제 #17
0
def parse(message, number):
	store(message, number)
	userProf = analyzeUser(number)

	if comparePrev(message, number):
		return "Message Sent"
	else:
		ent = entityMatch(message)
		if  ent == "None":
			print "keywords"
			print indicoio.keywords(message, version=2)
			print "tags"
			print indicoio.text_tags(message, threshold = .03)
			print "relevance"
			print indicoio.relevance("Renowned soccer legend Pele will be visiting...", ["Germany", "relocation", "Food", "Safety", "Family", "Transportation", "clothing"])

		else:
			"Found Entity, directing there"
			ticketCreate(message, number, ent)
예제 #18
0
def add_entry(text, indicoio_api_key):
    indicoio.config.api_key = indicoio_api_key
    try:
        entries = json.loads(open("journal.json", "r").read())
    except json.JSONDecodeError:
        entries = []
    entries.append({
        'date': datetime.utcnow(),
        'keywords': indicoio.keywords(text)
    })
    open("journal.json", "w").write(json.dumps(entries))
예제 #19
0
def scraper(request, logger):
    request_data = request.args.get('url')
    logger.debug("received {0}".format(request_data))

    url = request.args.get('url')
    url = urllib.unquote(url)
    headers = {
        "User-Agent":
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36"
    }

    logger.debug("url {0}".format(url))

    r = requests.get(url, headers=headers)
    tree = lxml.html.fromstring(r.text)
    data = tree.xpath("//body")[0].text_content()

    data = ''.join(data.split('\n'))

    ml_sectors = "ML Machine Learning AI Computer Vision Bots NML Anomaly Detection artificial intelligence"

    sectors_list = {
        "iot":
        "IoT",
        "ai":
        ml_sectors,
        "machine learning":
        ml_sectors,
        "telecom":
        "Telecom Telekom Phone",
        "fintech":
        "Payments FinTech Wallet P2P Bitcoin",
        "transportation":
        "Drive Cars Autonomous Taxi Bus Train Rail",
        "cyber":
        "Cyber Security intelligence Anomaly Detection Ad Fruad Ransom Virus Defense Attack",
    }

    logger.debug(u"data: {0}".format(data))

    sectors = []
    keywords = []
    analysis = indicoio.keywords(data, version=2)

    for keyword in analysis:
        keywords.append(keyword)
        for sector in sectors_list:
            if keyword.lower() in sectors_list[sector].lower():
                if not sector in sectors:
                    sectors.append(sector)

    return {"sectors": sectors}
예제 #20
0
파일: parse.py 프로젝트: sihrc/irksome
def parse_physical_constants(data):
    quantities = []
    elements = []
    for element in data["constant"]:
        if not element:
            continue
        quantities.append(element.pop("Quantity "))
        elements.append(element)

    list_keywords = indicoio.keywords(quantities, top_n=10)
    for i, element in enumerate(elements):
        element['Keywords'] = list_keywords[i].keys()

    return elements
예제 #21
0
 def generate_keywords(self):
     # get keywords
     keywords_dict = indicoio.keywords(self.tweet)
     all_else_dict = indicoio.analyze_text(
         self.tweet, apis=['places', 'people', 'organizations'])
     # store identified keywords
     keywords = []
     for key, value in keywords_dict.iteritems():
         if value > 0.2:
             keywords.append(key)
     for key, value in all_else_dict.iteritems():
         for list_item in value:
             for key_in, value_in in list_item.iteritems():
                 if key_in == 'text':
                     keywords.append(value_in)
     return keywords
예제 #22
0
def indicoioSent(request):
	allentries = Post.objects.all()

	one_entry = Post.objects.get(pk=1)
	# one_entry.statement = "Someone please fix the traffic here in Austin"
	# one_entry.save()
	counter = 1

	# parseTags = ""

	# one_entry.tags = indicoio.keywords(one_entry.statement)
	# for key in one_entry.tags:
	# 	parseTags+= key+", "
	# one_entry.tags = parseTags
	
	# one_entry.save()

	for one_entry in allentries:
	# 	#i = Post.objects.get(pk=counter)

	# 	one_entry.value = indicoio.sentiment_hq(one_entry.statement)

	# 	#add POS / NEG to sentiment field rather than percent numbers
	# 	if (one_entry.value > .7):
	# 		one_entry.sentiment = "POS"
	# 	elif(one_entry.value < .3):
	# 		one_entry.sentiment = "NEG"
	# 	else:
	# 		one_entry.sentiment = "NEU"

		parseTags = ""

		one_entry.tags = indicoio.keywords(one_entry.statement)
		for key in one_entry.tags:
			parseTags+= key+", "
		one_entry.tags = parseTags

		one_entry.save()
	# 	counter += 1

	allentries = serialize('json', Post.objects.all())
	
	return HttpResponse(allentries)

	
	
예제 #23
0
파일: views.py 프로젝트: eirism/PU-66
def extract_keyword(text):
    """

    Ask INDICO for the keyword in text.

    Returns the most important word in text,
    or None if there is none.
    Calls .lower() on the word.

    """
    response = indicoio.keywords(text, version=2, top_n=1)
    words = list(response.keys())
    if len(words) == 1:
        word = list(response.keys())[0].lower()
    else:
        word = None
    return word
예제 #24
0
 def test_batch_keywords(self):
     test_data = ["A working api is key to the success of our young company"]
     words = [set(text.lower().split()) for text in test_data]
     response = keywords(test_data, api_key=self.api_key)
     self.assertTrue(isinstance(response, list))
     self.assertTrue(set(response[0].keys()).issubset(words[0]))
 def getKeywords(self, text):
     try:
         keywords = indicoio.keywords(text)
     except:
         return None
     return keywords
 def keywords_extraction_relative(self, text):
     length = len(text.split())
     self.keywords_relative = keywords(text, top_n=length, threshold=0, relative=True)
예제 #27
0
파일: hack.py 프로젝트: schillerk/hack
from lxml import html
import requests
import indicoio

indicoio.config.api_key = '246290703649a7500961ea78369dbce8'

page = requests.get('https://www.soylent.com/')
content = page.content

print indicoio.keywords(content)
print content
예제 #28
0
 def test_keywords_v2(self):
     test_data = "A working api is key to the success of our young company"
     response = keywords(test_data, version=2)
     self.assertTrue(isinstance(response, dict))
     self.assertTrue(all([key in test_data for key in response.keys()]))
예제 #29
0
import indicoio
indicoio.config.api_key = '6f3382489ab2469dbf8ca6023e91eabb'

# single example
result = indicoio.keywords("I miss daddy so much", api_key=indicoio.config.api_key  , version=2  , top_n= 2 , relative=True);

resultString = "";
for keys in result:
    resultString = resultString + keys + ":" + str(result[keys])  + ","

print(resultString)

# batch example
#indicoio.emotion([
 #   "I did it. I got into Grad School. Not just any program, but a GREAT program. :-)",
  #  "Like seriously my life is bleak, I have been unemployed for almost a year."
#])

    for j in range(bin_size):
        if str(news_data.iloc[count]['headline']) != "nan":
            content += str(news_data.iloc[count]['headline'] + "\n")

        if str(news_data.iloc[count]['abstract']) != "nan":
            content += str(news_data.iloc[count]['abstract'] + "\n")

        if str(news_data.iloc[count]['snippet']) != "nan":
            content += str(news_data.iloc[count]['snippet'] + "\n")
        if str(news_data.iloc[count]['lead_paragraph']) != "nan":
            content += str(news_data.iloc[count]['lead_paragraph'] + "\n")
        count += 1
    content = re.sub("[^a-zA-Z \n]+", " ", content)
    tb = TextBlob(content)
    keywords = indicoio.keywords(content, version=2, top_n=50)
    keyword_list = sorted(keywords.items(),
                          key=operator.itemgetter(1),
                          reverse=True)

    # clean the words
    new_keyword_list = []
    for keyword in keyword_list:
        clean_keyword = unicodedata.normalize('NFKD', keyword[0]).encode(
            'ascii', 'ignore')
        new_keyword_list.append(
            (clean_keyword, keyword[1], tb.words.count(keyword[0])))
    all_keyword_list.append(new_keyword_list)

    print "finsih adding bin number: ", i
 def test_batch_keywords_v2(self):
     test_data = ["A working api is key to the success of our young company"]
     response = keywords(test_data, api_key=self.api_key, version=2)
     self.assertTrue(isinstance(response, list))
     self.assertTrue(all([key in test_data[0] for key in response[0].keys()]))
 def test_keywords_v2(self):
     test_data = "A working api is key to the success of our young company"
     response = keywords(test_data, api_key=self.api_key, version=2)
     self.assertTrue(isinstance(response, dict))
     self.assertTrue(all([key in test_data for key in response.keys()]))
예제 #33
0
def indicoKeywords(tweet):
    tag_dict = indicoio.keywords(tweet)
    return sorted(tag_dict.keys(), key=lambda x: tag_dict[x], reverse=True)[:1]
예제 #34
0
def execute(USERNAME, target, refresh):

    r_data = io_helper.read_raw(USERNAME, target)

    og = sys.stdout
    fpath = io_helper.out_path(USERNAME, target)

    def analysis(raw='', limit=5, text='', percent=True):
        global meta_dict
        # print lines if input is a list of non-dicts
        # if input is list of dicts, merge dicts and resend to analysis
        if isinstance(raw, list):
            for item in raw:
                if not isinstance(item, dict):
                    print(item)
                else:
                    create_meta_dict(item)
            analysis(meta_dict, limit, text, percent)

        # if input is dict: print k, v pairs
        # optional args for return limit and description text
        if isinstance(raw, dict):
            print(text)
            ct = 0
            for v in sorted(raw, key=raw.get, reverse=True):
                ct += 1
                if ct > limit: break
                if isinstance(raw[v], float):
                    if percent: per = r'%'
                    else: per = ''
                    print("    " + v, str(round(raw[v] * 100, 2)) + per)
                else:
                    print(v, raw[v])
            print()

    def create_meta_dict(item):
        # merge list of dicts into master dict
        global meta_dict
        meta_dict[item['text']] = item['confidence']
        return meta_dict

    rClean = ''
    for i in range(len(r_data)):
        if r_data[i - 1] == '\\':
            rClean = rClean[:-1]
            if r_data[i] != "'":
                continue

        if r_data[i] == '*':
            rClean += ' '
        else:
            rClean += r_data[i]

    r_data = rClean
    del rClean
    indicoio.config.api_key = keycheck.get_key()

    # Big 5
    big5 = {
        'text': "Big 5 personality inventory matches: ",
        "payload": indicoio.personality(r_data)
    }

    # Meyers briggs
    mbtiLabels = indicoio.personas(r_data)
    mbti_dict = {
        'architect': 'intj',
        'logician': 'intp',
        'commander': 'entj',
        'debater': 'entp',
        'advocate': 'infj',
        'mediator': 'infp',
        'protagonist': 'enfj',
        'campaigner': 'enfp',
        'logistician': 'istj',
        'defender': 'isfj',
        'executive': 'estj',
        'consul': 'esfj',
        'virtuoso': 'istp',
        'adventurer': 'isfp',
        'entrepreneur': 'estp',
        'entertainer': 'esfp'
    }

    def replace_mbti():
        for k, v in mbtiLabels.items():
            k = k.replace(k, mbti_dict[k])
            yield k

    k = (list(replace_mbti()))
    v = map(lambda x: x, mbtiLabels.values())
    payload = (dict(zip(k, v)))

    mbti = {
        'text': "Most likely personalilty styles: ",
        "payload": payload,
        'ct': 5,
        'percent': True
    }

    # Political
    pol = {
        'text': "Political alignments: ",
        "payload": indicoio.political(r_data, version=1)
    }
    # Sentiment
    sen = {
        'text': "Sentiment: ",
        "payload": {
            'Percent positive': indicoio.sentiment(r_data)
        },
        'ct': 3
    }

    # Emotion
    emo = {
        'text': "Predominant emotions:",
        "payload": indicoio.emotion(r_data),
        'ct': 5
    }

    # Keywords
    kw = {'text': "Keywords: ", "payload": indicoio.keywords(r_data), 'ct': 5}
    # Text tags
    tt = {
        'text': "Text tags: ",
        "payload": indicoio.text_tags(r_data),
        'ct': 10
    }
    # Place
    pla = {
        'text': "Key locations: ",
        'payload': indicoio.places(r_data, version=2),
        'ct': 3,
        'percent': True
    }

    def Karma(USERNAME):
        import praw
        import collections
        kList = []
        user_agent = ("N2ITN")
        r = praw.Reddit(user_agent=user_agent)
        thing_limit = 100

        user = r.get_redditor(USERNAME)
        gen = user.get_submitted(limit=thing_limit)
        karma_by_subreddit = {}
        for thing in gen:
            subreddit = thing.subreddit.display_name
            karma_by_subreddit[subreddit] = (
                karma_by_subreddit.get(subreddit, 0) + thing.score)

        for w in sorted(karma_by_subreddit,
                        key=karma_by_subreddit.get,
                        reverse=True):
            kList.append(str(w) + ': ' + str(karma_by_subreddit[w]))
        kList.insert(0, 'Karma by Sub')

        print("\n\t".join(kList[:10]))

    def show(results):
        # Accepts bag of dicts, or single dict
        if not isinstance(results, dict):
            for X in results:
                show(X)
        else:
            if results == pla and pla['payload'] == []:
                print("Not enough information to infer place of origin")
                print()
            else:

                i = results
                analysis(raw=i.get('payload', ''),
                         limit=i.get('ct', 5),
                         text=i.get('text', ''),
                         percent=i.get('percent', True))

    with open(fpath, 'w') as outtie:
        sys.stdout = outtie
        print(target + USERNAME)
        print()
        show([kw, pla, big5, emo, sen, pol, mbti, tt])
        Karma(USERNAME)

        sys.stdout = og
    return
예제 #35
0
def findKeywords(inputString, top_n=5):
    ''' returns the keywords of a sentence in a list'''
    keywordDict = indicoio.keywords(inputString,top_n=top_n)
    return list(keywordDict)
예제 #36
0
def analyzer(job_desc):
    shordy = indicoio.keywords(job_desc, threshold=0.15)
    return [a for a in shordy.keys()]
예제 #37
0
    keys = list(dic.keys())
    return keys[index]


def getArgMax(dic, rnd=0):
    argMaxIndex = np.argmax(list(dic.values()))

    value = getValueAtIndex(dic, argMaxIndex)
    key = getKeyAtIndex(dic, argMaxIndex)

    if rnd:
        value = round(value, 2)

    return (key, value)


indicoio.config.api_key = 'a840148f0c38c41d3f70b43914656798'

text = "We're supposed to get up to 24 inches in the storm"

# single example
emotions = indicoio.emotion(text)

keywords = indicoio.keywords(text, version=2, relative=True)

print("Text: ", text)

print("Emotion: ", getArgMax(emotions, rnd=2))
print("Keyword: ",
      tuple((key, round(value, 2)) for key, value in list(keywords.items())))
예제 #38
0
fd = open("NYT.txt", "r")
string1 = fd.read()
#fd = open("BBC.txt", "r")
#string1 += fd.read()
string1 = string1.replace("\xe2\x80\x9c", "\"")
string1 = string1.replace("\xe2\x80\x9d", "\"")
string1 = string1.replace("\xe2\x80\x99", "\'")
keywordList = []
tagList = []
entityList = []

myList = string1.split("\n", size)

for x in range(0, size):
    keywordList.append(indicoio.keywords(myList[x], top_n=10,
                                         independent=True))
    tagList.append(indicoio.text_tags(myList[x], threshold=.05))
    entityList.append(indicoio.named_entities(myList[x]))
    #print indicoio.text_tags(myList[x], threshold=.1)
    #print indicoio.keywords(myList[x], top_n=6, independent=True)

## build 2-d array of weights
matrix = [[0 for x in range(size)] for x in range(size)]

for x in range(0, size):
    for y in range(0, size):
        matrix[x][y] = 1000 * compareKeywords(
            keywordList[x], keywordList[y]) * compareTags(
                tagList[x], tagList[y]) * compareEntities(
                    entityList[x], entityList[y])
        #print str(x) + " " + str(y) + " " + str(matrix[x][y])
	for j in range(bin_size):
		if str(news_data.iloc[count]['headline']) != "nan":
			content += str(news_data.iloc[count]['headline'] + "\n")

		if str(news_data.iloc[count]['abstract']) != "nan":
			content += str(news_data.iloc[count]['abstract'] + "\n")
		
		if str(news_data.iloc[count]['snippet']) != "nan":
			content += str(news_data.iloc[count]['snippet'] + "\n")
		if str(news_data.iloc[count]['lead_paragraph']) != "nan":
			content += str(news_data.iloc[count]['lead_paragraph'] + "\n")
		count += 1
	content = re.sub("[^a-zA-Z \n]+", " ", content)
	tb = TextBlob(content)
	keywords = indicoio.keywords(content, version=2 , top_n = 50)
	keyword_list = sorted(keywords.items(), key=operator.itemgetter(1),reverse=True)

	# clean the words
	new_keyword_list = []
	for keyword in keyword_list:
		clean_keyword = unicodedata.normalize('NFKD', keyword[0]).encode('ascii','ignore')
		new_keyword_list.append((clean_keyword, keyword[1], tb.words.count(keyword[0])))
	all_keyword_list.append(new_keyword_list)

	print "finsih adding bin number: ", i 

	df = pd.DataFrame(all_keyword_list)
	# save the result into the file in every loop
	with open('test3.csv', 'a') as f:
	    df.to_csv(f, header=False)
예제 #40
0
파일: keywords.py 프로젝트: nshdesai/Amax
def get_keywords(data):
    """Returns all keywords in an article along with a confidence score """
    set_api_key()
    if data:
        return indicoio.keywords(data, threshold=0.15)
예제 #41
0
#with open('textfile.txt', 'r') as myfile:
#   data = myfile.read().replace('\n', '')
#print(data)
import os
import indicoio

# reads from the file which contains the audio to speech content
__location__ = os.path.realpath(
    os.path.join(os.getcwd(), os.path.dirname(__file__)))
file_contents = open(os.path.join(__location__, "textfile.txt"))
text = file_contents.read()

# next, feed it into the ML API
indicoio.config.api_key = 'd08fbca96c4341957f0a8a0b21d08b5d'
print("Political Allegiance: ")
print(indicoio.political(text))
print("\n")
print("Key Words: ")
print(indicoio.keywords(text, version=2))
print("\n")
print("Important Persons: ")
print(indicoio.people(text))
print("\n")
print("Significant Locations: ")
print(indicoio.places(text))
print("\n")
print("Relevant Organizations: ")
print(indicoio.organizations(text))
예제 #42
0
def get_keywords():
    if request.method == 'POST':
        data = dict(request.form)['data_to_analyze']
        return json.dumps({
            'keywords': indicoio.keywords(data, top_n = 15, threshold = 0.3, relative=True)[0].keys()
        })
 def keywords_extraction(self, text):
     length = len(text.split())
     self.keywords = keywords(text, top_n=length, threshold=0)
 def fetch_keywords(self):
     words = indicoio.keywords(self.data, top_n = 5, version=2)  
     self.final_json['keywords'] = set(sorted(words, key=words.get, reverse=True)[:5])
     print("\nwords: ", self.final_json['keywords'])
예제 #45
0
# -*- coding: utf-8 -*-
# @Author: Abhi
# @Date:   2018-05-22 18:07:24
# @Last Modified by:   Abhi
# @Last Modified time: 2018-05-22 18:26:55

import indicoio as io
from collections import Counter
io.config.api_key = "9bc524ad52580fbbc308b2b136777ef9"

file = open("unique-crimes.txt", "r")
i = 0
rates = []
for crime in file.readlines():
    i += 1
    keywords = Counter(io.keywords(crime, version=2))
    top_words = keywords.most_common(2)
    keyword = " ".join(x[0] for x in top_words)
    rates += [(crime.strip(), io.sentiment_hq(keyword))]
    # print(keywords)
    # print(keyword)
    # print(io.sentiment_hq(keyword))
    # print()
    if i == 30:
        break
for rate in sorted(rates, key=lambda rate: rate[1]):
    print(rate)
예제 #46
0
파일: sentiment.py 프로젝트: LeeTZ/YAES
    dict = json.loads(text)
    asin = dict[maxj[i]]["asin"]
    reviewText = dict[maxj[i]]["reviewText"]
    overall = dict[maxj[i]]["overall"]
    reviewTime = dict[maxj[i]]["reviewTime"]
    reviewadd = [asin, reviewText, overall, reviewTime, maxsenti[i]]
    negativereviewinfile.append(reviewadd)
json.dump(negativereviewinfile, open('negativereview.json', 'wb'))

#keyvalue = [averagerate, averagesentiment, num, positivenum, middlenum, negativenum]
keyvalue = []
keyvalue.append(averagerate)
keyvalue.append(averagesentiment)
keyvalue.append(num)
keyvalue.append(positivenum)
keyvalue.append(middlenum)
keyvalue.append(negativenum)

json.dump(keyvalue, open('keyvalue.json', 'wb'))

#print maxi
#print maxj
#print maxsenti
#print positivenum
#print negativenum
#print num


keyw = keywords(keyreview)
json.dump(keyw, open('keyw.json', 'wb'))
예제 #47
0
These key-value pairs represent the likelihood that each of the
extracted keywords are relevant to the analyzed text.
The keys in the dictionary are strings containing the extracted keywords,
and the values are the likelihoods that these keywords are relevant to the analyzed text.

Using batched output, plot relevance graphs using pyplot.
input file -> text transcript

"""

import plotly.plotly as py
import plotly.graph_objs as go
#version 2.3

import indicoio
indicoio.config.api_key = "6e20bd4ee1b0be47f25d0f227578fd14"

#identify the important words within a document

# single example
indicoio.keywords("Some call it the sunshine state", version=2)

# batch example
x = indicoio.keywords(
    ["Some call it the sunshine state", "Some call it the sunshine state"],
    version=2)

for i in x:
    for u in i:
        print(u)
예제 #48
0
def crawlTwits(term):
    # get authentication
    api = TwitterAPI('1KxHa1hlPbRdsggvL5yfBgHPY', 'afQVw38uLX3Q1YdILflyG4FjWhjkMzXgSP9ypLee4LM4QIMOea',
                     '2786140432-npYkBKUXdUj3ulYj5f2N7LLN7dVJD6L6KdoyyLi',
                     'qwOdzFwyNfBKcmO6tq2TbOElrDHfd0ooiXNhMy4a7kUMd')
    indicoio.config.api_key = 'e2637d8d80afb64412b3dda3dda64bdd'

    # keep a counter to sum the sentiment score
    scoreSum = 0
    # keep a counter to sum the number of twits
    twitsNum = 0
    # keep a list of keywords
    listKeyWords = ""

    # search twits
    r = api.request('search/tweets', {'q': term})
    for item in r:
        # filter out patterns
        patterns = re.compile(', u\'text\': u\'(.*?)\', u\'is_quote_status\':')
        if patterns is None:
            patterns = re.compile(', u\'text\': u\"(.*?), u\'is_quote_status\':')
        # search for patterns from twits
        text = patterns.search(str(item))
        # if found
        if text:
            # group into a text
            twit = text.group(1)

            # send twit to indico to get sentiment analyzed
            sentimentNum = indicoio.sentiment_hq(twit)
            # sent twit to indico to get keywords
            json_keyWords = indicoio.keywords(twit)
            # go through dict object
            for key, value in json_keyWords.items():
                # if the key is relevant enough
                if value >= 0.2:
                    # add keywords to the list
                    listKeyWords += key + ", "

            # add up score sum
            scoreSum += sentimentNum
            # increment number of twits
            twitsNum += 1

            # Uncomment lines below to debug
            # print(twit)
            # print(sentimentNum)
            # if sentimentNum < 0.3:
            #     print("Negative")
            # elif sentimentNum > 0.7:
            #     print("Positive")
            # else:
            #     print("Neutral")
            # print('\n')

    # compute the average sentiment score
    average = scoreSum / twitsNum
    # get the evaluation
    if average <= 0.2:
        rate = "very negative"
    elif average <= 0.4:
        rate = "slightly negative"
    elif average >= 0.8:
        rate = "very positive"
    elif average >= 0.6:
        rate = "slightly positive"
    else:
        rate = "neutral"
    # string to return
    string = "an average score of " + str(average) + "\nOverall, it is " + str(rate) + "\nKeywords are " + listKeyWords
    return string
예제 #49
0
import indicoio
import operator

# use your unique api access code
indicoio.config.api_key = 'bbae77d5bf0a8ced94bb348563710d44'

# sentence definition
sentence = "I have put blood on her two clean yellow shoes."

# dictionary of keywords and importances
keywords = indicoio.keywords(sentence)
print(keywords)

# the key with the highest importance
top_key = max(keywords, key=keywords.get)
print(top_key)

words = sentence.split()
예제 #50
0
def keywords(s):
    keyword_dict = indicoio.keywords(s)
    return sorted(keyword_dict.keys(), key=lambda x: keyword_dict[x], reverse=True)[:5]
def identify_keywords(text):
  x = indicoio.text_tags(text, threshold = 0.01, top_n = NUM_RESULTS)
  y = indicoio.keywords(text, threshold = 0.01, top_n = NUM_RESULTS)
  x.update(y)
  return x
 def fetch_keywords(self):
     words = indicoio.keywords(self.data, top_n=5, version=2)
     self.final_json['keywords'] = set(
         sorted(words, key=words.get, reverse=True)[:5])
     print("\nwords: ", self.final_json['keywords'])
예제 #53
0
def keywords(text):
    return indicoio.keywords(text)
예제 #54
0
 def get_keywords(self, desc):
     kw = indicoio.keywords(desc, version=4)
     kwords = pd.DataFrame(kw, index=['weight']).T.reset_index().rename(
         {'index': 'word'}, axis='columns')
     return kwords