コード例 #1
0
    def test_places_v1(self):
        test_data = "Lets all go to Virginia beach before it gets too cold to wander outside."
        response = places(test_data, version=1)
        self.assertTrue(isinstance(response, list))
        sorted_response = sorted(response, key=lambda x: x['confidence'], reverse=True)
        self.assertTrue('Virginia' in sorted_response[0]['text'])

        test_data = [test_data] * 2
        response = places(test_data, version = 1)
        self.assertTrue(isinstance(response, list))
        sorted_response = [sorted(arr, key=lambda x: x['confidence'], reverse=True) for arr in response]
        self.assertTrue('Virginia' in sorted_response[0][0]['text'])
コード例 #2
0
    def test_places(self):
        test_data = "Lets all go to Virginia Beach before it gets too cold to wander outside."
        response = places(test_data)
        self.assertTrue(isinstance(response, list))
        sorted_response = sorted(response, key=lambda x: x['confidence'], reverse=True)
        self.assertTrue('Virginia' in sorted_response[0]['text'])

        test_data = [test_data] * 2
        response = places(test_data)
        self.assertTrue(isinstance(response, list))
        sorted_response = [sorted(arr, key=lambda x: x['confidence'], reverse=True) for arr in response]
        self.assertTrue('Virginia' in sorted_response[0][0]['text'])
コード例 #3
0
    def test_places_v2(self):
        test_data = (
            "Lets all go to Virginia beach before it gets too cold to wander outside."
        )
        response = places(test_data)
        self.assertTrue(isinstance(response, list))
        sorted_response = sorted(response, key=lambda x: x["confidence"], reverse=True)
        self.assertTrue("Virginia" in sorted_response[0]["text"])

        test_data = [test_data] * 2
        response = places(test_data)
        self.assertTrue(isinstance(response, list))
        sorted_response = [
            sorted(arr, key=lambda x: x["confidence"], reverse=True) for arr in response
        ]
        self.assertTrue("Virginia" in sorted_response[0][0]["text"])
コード例 #4
0
def find_plot_country(book_page_name):

    page_results = wikipedia.page(book_page_name)
    page_summary = page_results.summary
    places = indicoio.places(page_summary)
    # print('page_results', page_results)
    # print('page_summary', page_summary)

    if places == []:
        print(page_results.section("Plot"))
        page_plot = page_results.section("Plot")
        plot_places = indicoio.places(page_plot)
        pp = plot_places[1]['text']
        print(pp)
    else:
        plot_places_sum = indicoio.places(page_summary)
        ps = plot_places_sum[1]['text']
        print(ps)
コード例 #5
0
def bing_search(query, name):
    # base API url
    url = 'https://api.cognitive.microsoft.com/bing/v5.0/news/search'
    # url += '?$format=json&$top=1&Query=%27{}%27'.format(quote_plus(query))
    # count max at 100
    # url += '?q={}&count=10'.format(quote_plus(query))
    url += '?q={}&count=100'.format((query))
    headers = {'Ocp-Apim-Subscription-Key': BING_KEY}
    r = requests.get(url, headers=headers)
    resp = json.loads(r.text)
    # print(len(resp['value']))
    # print(resp)
    news = []
    # after studying the json, learned what the data structure is
    # set up is dictionary > list > dictionary
    # need to call the data in the right format
    for article in resp["value"]:
        if name in article['name']:
            title_place = indicoio.places(article['name'])
            descrip_place = indicoio.places(article['description'])
            sorted_locations = sorted(
                descrip_place,
                key=lambda descrip_place: descrip_place['confidence'],
                reverse=True)
            # if there is content in the places, store it in a dic
            if len(title_place) > 0:
                temp_dic = {}
                temp_dic['title'] = article['name']
                temp_dic['location'] = title_place[0]['text']
                temp_dic['description'] = article['description']
                temp_dic['date_pub'] = article['datePublished']
                temp_dic['url'] = article['url']
                news.append(temp_dic)
            elif len(sorted_locations) > 0:
                temp_dic = {}
                temp_dic['title'] = article['name']
                temp_dic['location'] = sorted_locations[0]['text']
                temp_dic['description'] = article['description']
                temp_dic['date_pub'] = article['datePublished']
                temp_dic['url'] = article['url']
                news.append(temp_dic)
    # print(news)
    return news
コード例 #6
0
def find_plot_country(book_page_name):
    try:
        page_results = wikipedia.page(book_page_name)
    except wikipedia.exceptions.DisambiguationError:
        return 'DisambiguationError'
    except wikipedia.exceptions.PageError:
        return 'PageError'
    places = []
    page_plot = page_results.section('Plot')
    if page_plot is not None and page_plot != '':
        places = indicoio.places(page_plot)

    if places == []:
        page_summary = page_results.summary
        places = indicoio.places(page_summary)
    potentials = []
    for item in places:
        potentials.append((item['confidence'], item['text']))
    potentials.sort(reverse=True)

    if potentials != []:
        return potentials[0][1]
    return 'Plot location not found on Wikipedia'
コード例 #7
0
ファイル: keyword.py プロジェクト: kpsuperplane/mhacks9
def keywords(blob): 
    things = []
    indicoio.config.api_key = 'ab83001ca5c484aa92fc18a5b2d6585c'
    people = indicoio.people(blob)
    for person in people: 
    	if person['confidence'] > 0.5: 
    		things.append(person['text'])		

    places = indicoio.places(blob)
    for place in places: 
    	if place['confidence'] > 0.5: 
    		things.append(place['text'])	
    print(things)
    blob = parse_stop_words(blob)
    tfdic = tf(blob)
    things.append(list(tfidf(tfdic, idf(tfdic, blob)).keys()))
    things = list(set(source_list))
    return things
コード例 #8
0
# print(michelle_news[0][3]['description'])
# positive = [tweet for tweet in tweet_data if tweet['sentiment']['pos'] > 0]


michelle_places = []
# there are four main search_phrases
for search_phrases in michelle_news:
    # for each article in the results for each search phrase, checks if the
    # the title contains the phrase "Michelle Obama"
    for article in search_phrases:
        if 'Michelle Obama' in article['title']:
            # apply the indico place API to the title
            # if there are results, keep the article
            # print(article['title'])
            place = indicoio.places(article['title'])
            if len(place) > 0:
                temp_dic = {}
                temp_dic['title'] = article['title']
                temp_dic['location'] = place[0]['text']
                temp_dic['description'] = article['description']
                temp_dic['date_pub'] = article['date_pub']
                temp_dic['url'] = article['url']
                michelle_places.append(temp_dic)

print(len(michelle_places))

print(michelle_places)
# print(relevant[1]['description'])

# sort list to show greatest location with highest confidence to location with lowest
コード例 #9
0
def execute(USERNAME, target, refresh):

    r_data = io_helper.read_raw(USERNAME, target)

    og = sys.stdout
    fpath = io_helper.out_path(USERNAME, target)

    def analysis(raw='', limit=5, text='', percent=True):
        global meta_dict
        # print lines if input is a list of non-dicts
        # if input is list of dicts, merge dicts and resend to analysis
        if isinstance(raw, list):
            for item in raw:
                if not isinstance(item, dict):
                    print(item)
                else:
                    create_meta_dict(item)
            analysis(meta_dict, limit, text, percent)

        # if input is dict: print k, v pairs
        # optional args for return limit and description text
        if isinstance(raw, dict):
            print(text)
            ct = 0
            for v in sorted(raw, key=raw.get, reverse=True):
                ct += 1
                if ct > limit: break
                if isinstance(raw[v], float):
                    if percent: per = r'%'
                    else: per = ''
                    print("    " + v, str(round(raw[v] * 100, 2)) + per)
                else:
                    print(v, raw[v])
            print()

    def create_meta_dict(item):
        # merge list of dicts into master dict
        global meta_dict
        meta_dict[item['text']] = item['confidence']
        return meta_dict

    rClean = ''
    for i in range(len(r_data)):
        if r_data[i - 1] == '\\':
            rClean = rClean[:-1]
            if r_data[i] != "'":
                continue

        if r_data[i] == '*':
            rClean += ' '
        else:
            rClean += r_data[i]

    r_data = rClean
    del rClean
    indicoio.config.api_key = keycheck.get_key()

    # Big 5
    big5 = {
        'text': "Big 5 personality inventory matches: ",
        "payload": indicoio.personality(r_data)
    }

    # Meyers briggs
    mbtiLabels = indicoio.personas(r_data)
    mbti_dict = {
        'architect': 'intj',
        'logician': 'intp',
        'commander': 'entj',
        'debater': 'entp',
        'advocate': 'infj',
        'mediator': 'infp',
        'protagonist': 'enfj',
        'campaigner': 'enfp',
        'logistician': 'istj',
        'defender': 'isfj',
        'executive': 'estj',
        'consul': 'esfj',
        'virtuoso': 'istp',
        'adventurer': 'isfp',
        'entrepreneur': 'estp',
        'entertainer': 'esfp'
    }

    def replace_mbti():
        for k, v in mbtiLabels.items():
            k = k.replace(k, mbti_dict[k])
            yield k

    k = (list(replace_mbti()))
    v = map(lambda x: x, mbtiLabels.values())
    payload = (dict(zip(k, v)))

    mbti = {
        'text': "Most likely personalilty styles: ",
        "payload": payload,
        'ct': 5,
        'percent': True
    }

    # Political
    pol = {
        'text': "Political alignments: ",
        "payload": indicoio.political(r_data, version=1)
    }
    # Sentiment
    sen = {
        'text': "Sentiment: ",
        "payload": {
            'Percent positive': indicoio.sentiment(r_data)
        },
        'ct': 3
    }

    # Emotion
    emo = {
        'text': "Predominant emotions:",
        "payload": indicoio.emotion(r_data),
        'ct': 5
    }

    # Keywords
    kw = {'text': "Keywords: ", "payload": indicoio.keywords(r_data), 'ct': 5}
    # Text tags
    tt = {
        'text': "Text tags: ",
        "payload": indicoio.text_tags(r_data),
        'ct': 10
    }
    # Place
    pla = {
        'text': "Key locations: ",
        'payload': indicoio.places(r_data, version=2),
        'ct': 3,
        'percent': True
    }

    def Karma(USERNAME):
        import praw
        import collections
        kList = []
        user_agent = ("N2ITN")
        r = praw.Reddit(user_agent=user_agent)
        thing_limit = 100

        user = r.get_redditor(USERNAME)
        gen = user.get_submitted(limit=thing_limit)
        karma_by_subreddit = {}
        for thing in gen:
            subreddit = thing.subreddit.display_name
            karma_by_subreddit[subreddit] = (
                karma_by_subreddit.get(subreddit, 0) + thing.score)

        for w in sorted(karma_by_subreddit,
                        key=karma_by_subreddit.get,
                        reverse=True):
            kList.append(str(w) + ': ' + str(karma_by_subreddit[w]))
        kList.insert(0, 'Karma by Sub')

        print("\n\t".join(kList[:10]))

    def show(results):
        # Accepts bag of dicts, or single dict
        if not isinstance(results, dict):
            for X in results:
                show(X)
        else:
            if results == pla and pla['payload'] == []:
                print("Not enough information to infer place of origin")
                print()
            else:

                i = results
                analysis(raw=i.get('payload', ''),
                         limit=i.get('ct', 5),
                         text=i.get('text', ''),
                         percent=i.get('percent', True))

    with open(fpath, 'w') as outtie:
        sys.stdout = outtie
        print(target + USERNAME)
        print()
        show([kw, pla, big5, emo, sen, pol, mbti, tt])
        Karma(USERNAME)

        sys.stdout = og
    return
コード例 #10
0
#with open('textfile.txt', 'r') as myfile:
#   data = myfile.read().replace('\n', '')
#print(data)
import os
import indicoio

# reads from the file which contains the audio to speech content
__location__ = os.path.realpath(
    os.path.join(os.getcwd(), os.path.dirname(__file__)))
file_contents = open(os.path.join(__location__, "textfile.txt"))
text = file_contents.read()

# next, feed it into the ML API
indicoio.config.api_key = 'd08fbca96c4341957f0a8a0b21d08b5d'
print("Political Allegiance: ")
print(indicoio.political(text))
print("\n")
print("Key Words: ")
print(indicoio.keywords(text, version=2))
print("\n")
print("Important Persons: ")
print(indicoio.people(text))
print("\n")
print("Significant Locations: ")
print(indicoio.places(text))
print("\n")
print("Relevant Organizations: ")
print(indicoio.organizations(text))
コード例 #11
0
ファイル: insights.py プロジェクト: N2ITN/Reddit_Persona
def execute(USERNAME, target, refresh):

    r_data = io_helper.read_raw(USERNAME, target)

    og = sys.stdout
    fpath = io_helper.out_path(USERNAME, target)

    def analysis(raw='', limit=5, text='', percent=True):
        global meta_dict
        # print lines if input is a list of non-dicts
        # if input is list of dicts, merge dicts and resend to analysis
        if isinstance(raw, list):
            for item in raw:
                if not isinstance(item, dict):
                    print(item)
                else:
                    create_meta_dict(item)
            analysis(meta_dict, limit, text, percent)

        # if input is dict: print k, v pairs
        # optional args for return limit and description text
        if isinstance(raw, dict):
            print(text)
            ct = 0
            for v in sorted(raw, key=raw.get, reverse=True):
                ct += 1
                if ct > limit: break
                if isinstance(raw[v], float):
                    if percent: per = r'%'
                    else: per = ''
                    print("    " + v, str(round(raw[v] * 100, 2)) + per)
                else:
                    print(v, raw[v])
            print()

    def create_meta_dict(item):
        # merge list of dicts into master dict
        global meta_dict
        meta_dict[item['text']] = item['confidence']
        return meta_dict

    rClean = ''
    for i in range(len(r_data)):
        if r_data[i - 1] == '\\':
            rClean = rClean[:-1]
            if r_data[i] != "'":
                continue

        if r_data[i] == '*':
            rClean += ' '
        else:
            rClean += r_data[i]

    r_data = rClean
    del rClean
    indicoio.config.api_key = keycheck.get_key()

    # Big 5
    big5 = {'text': "Big 5 personality inventory matches: ", "payload": indicoio.personality(r_data)}

    # Meyers briggs
    mbtiLabels = indicoio.personas(r_data)
    mbti_dict = {
        'architect': 'intj',
        'logician': 'intp',
        'commander': 'entj',
        'debater': 'entp',
        'advocate': 'infj',
        'mediator': 'infp',
        'protagonist': 'enfj',
        'campaigner': 'enfp',
        'logistician': 'istj',
        'defender': 'isfj',
        'executive': 'estj',
        'consul': 'esfj',
        'virtuoso': 'istp',
        'adventurer': 'isfp',
        'entrepreneur': 'estp',
        'entertainer': 'esfp'
    }

    def replace_mbti():
        for k, v in mbtiLabels.items():
            k = k.replace(k, mbti_dict[k])
            yield k

    k = (list(replace_mbti()))
    v = map(lambda x: x, mbtiLabels.values())
    payload = (dict(zip(k, v)))

    mbti = {'text': "Most likely personalilty styles: ", "payload": payload, 'ct': 5, 'percent': True}

    # Political
    pol = {'text': "Political alignments: ", "payload": indicoio.political(r_data, version=1)}
    # Sentiment
    sen = {'text': "Sentiment: ", "payload": {'Percent positive': indicoio.sentiment(r_data)}, 'ct': 3}

    # Emotion 
    emo = {'text': "Predominant emotions:", "payload": indicoio.emotion(r_data), 'ct': 5}

    # Keywords
    kw = {'text': "Keywords: ", "payload": indicoio.keywords(r_data), 'ct': 5}
    # Text tags
    tt = {'text': "Text tags: ", "payload": indicoio.text_tags(r_data), 'ct': 10}
    # Place
    pla = {
        'text': "Key locations: ",
        'payload': indicoio.places(r_data, version=2),
        'ct': 3,
        'percent': True
    }

    def Karma(USERNAME):
        import praw
        import collections
        kList = []
        user_agent = ("N2ITN")
        r = praw.Reddit(user_agent=user_agent)
        thing_limit = 100

        user = r.get_redditor(USERNAME)
        gen = user.get_submitted(limit=thing_limit)
        karma_by_subreddit = {}
        for thing in gen:
            subreddit = thing.subreddit.display_name
            karma_by_subreddit[subreddit] = (karma_by_subreddit.get(subreddit, 0) + thing.score)

        for w in sorted(karma_by_subreddit, key=karma_by_subreddit.get, reverse=True):
            kList.append(str(w) + ': ' + str(karma_by_subreddit[w]))
        kList.insert(0, 'Karma by Sub')

        print("\n\t".join(kList[:10]))

    def show(results):
        # Accepts bag of dicts, or single dict
        if not isinstance(results, dict):
            for X in results:
                show(X)
        else:
            if results == pla and pla['payload'] == []:
                print("Not enough information to infer place of origin")
                print()
            else:

                i = results
                analysis(
                    raw=i.get('payload', ''),
                    limit=i.get('ct', 5),
                    text=i.get('text', ''),
                    percent=i.get('percent', True)
                )

    with open(fpath, 'w') as outtie:
        sys.stdout = outtie
        print(target + USERNAME)
        print()
        show([kw, pla, big5, emo, sen, pol, mbti, tt])
        # Karma(USERNAME)

        sys.stdout = og
    return