Beispiel #1
0
def analyze_personality(twitter_name):
    '''
	twitter_name::type -> [str]
	Function to determine the personality of user base on a csv file containing their most recent tweets.
	'''
    global personality_dict
    data = []

    csv_name = twitter_name + '_tweets.csv'
    if not os.path.isfile(csv_name):
        get_all_tweets(twitter_name)

    with open(csv_name) as file:
        reader = csv.reader(file)
        for row in reader:
            if len(row) > 0:
                # checks if the content of the tweet is a link to an article/image
                text = row[2][2:-1]
                if "https" in text:
                    continue
                data.append(text)

    coae = ["conscientiousness", "openness", "agreeableness", "extraversion"]

    for i in data:
        mini_dic = indicoio.personality(i)
        result = max(mini_dic.keys(), key=(lambda k: mini_dic[k]))
        if result in coae:
            increment_personality(result)

    personality = max(personality_dict.keys(),
                      key=(lambda k: personality_dict[k]))
    clear(coae)
    return personality
Beispiel #2
0
def main():
    if len(sys.argv) != 3:
        return

    inname = sys.argv[1]
    outname = sys.argv[2]

    with open(inname, mode='r') as inFile:
        tweets = json.load(inFile)
        count = 0

        for tweet in tweets:
            result = indicoio.personality(tweet['text'])
            tweet['extraversion'] = result['extraversion']
            tweet['openness'] = result['openness']
            tweet['agreeableness'] = result['agreeableness']
            tweet['conscientiousness'] = result['conscientiousness']

            count += 1

            if count % 100 == 0:
                print(count)
                with open(outname, 'w') as outfile:
                    json.dump(tweets, outfile)

        with open(outname, 'w') as outfile:
            json.dump(tweets, outfile)
    def getResult(strArray):
        sent = indicoio.sentiment(strArray)
        pers = indicoio.personality(strArray)
        poli = indicoio.political(strArray)
        keyw = indicoio.keywords(strArray)

        result = dict([("sentiment", sent), ("personality", pers), ("political", 4098), ("keywords", keyw)])
        return result
    def test_personalities(self):
        test_string = "I love my friends!"
        response = personality(test_string)

        categories = ['extraversion', 'openness', 'agreeableness', 'conscientiousness']
        self.assertTrue(isinstance(response, dict))
        self.assertIsInstance(response['extraversion'], float)
        for category in categories:
            assert category in response.keys()
 def test_batch_personality(self):
     test_string = "I love my friends!"
     response = personality([test_string,test_string])
     categories = ['extraversion', 'openness', 'agreeableness', 'conscientiousness']
     self.assertTrue(isinstance(response, list))
     self.assertIsInstance(response[0]["extraversion"], float)
     for category in categories:
         assert category in response[0].keys()
     self.assertEqual(response[0]["extraversion"], response[1]["extraversion"])
 def analyze_tweets_personality(self):
     try:
         self.personality_stats = Factor(
             indicoio.personality(
                 self.person.all_text_as_one().content).items(),
             'Personality stats')
         self.plotter.add_factor(self.personas_stats)
     except IndicoError:
         raise PersonAnalyzerException(
             'Error while fetching data from indicoio')
Beispiel #7
0
    def test_personalities(self):
        test_string = "I love my friends!"
        response = personality(test_string)

        categories = [
            'extraversion', 'openness', 'agreeableness', 'conscientiousness'
        ]
        self.assertTrue(isinstance(response, dict))
        self.assertIsInstance(response['extraversion'], float)
        for category in categories:
            assert category in response.keys()
Beispiel #8
0
 def test_batch_personality(self):
     test_string = "I love my friends!"
     response = personality([test_string, test_string])
     categories = [
         'extraversion', 'openness', 'agreeableness', 'conscientiousness'
     ]
     self.assertTrue(isinstance(response, list))
     self.assertIsInstance(response[0]["extraversion"], float)
     for category in categories:
         assert category in response[0].keys()
     self.assertEqual(response[0]["extraversion"],
                      response[1]["extraversion"])
 def get_personality(self):
     personality_scores = [0, 0, 0, 0]
     personality_dict = indicoio.personality(self.tweet_text)
     for key, value in personality_dict.iteritems():
         if key == 'extraversion':
             personality_scores[0] += value
         elif key == 'openness':
             personality_scores[1] += value
         elif key == 'agreeableness':
             personality_scores[2] += value
         elif key == 'conscientiousness':
             personality_scores[3] += value
     return personality_scores
def analyze_text(text_tweets):
    personality_scores_list = []
    emotion_scores_list = []

    personality = indicoio.personality(text_tweets)
    emotion = indicoio.emotion(text_tweets)

    for x in personality:
        personality_scores_list.append(x)

    for y in emotion:
        emotion_scores_list.append(y)

    return personality_scores_list, emotion_scores_list
Beispiel #11
0
def q1():
    user_input = input("My idea of a fun friday night is ___")
    print "Your input: " + str(user_input)
    emotion = indicoio.emotion(user_input)
    personality = indicoio.personality(user_input)
    personas = indicoio.personas(user_input)

    pprint(emotion)
    e_max = max(emotion, key=emotion.get)
    personas_max = max(personas, key=personas.get)
    personality_max = max(personality, key=personality.get)

    print "Congradulations, your emotion is " + str(
        e_max) + ", your personality is " + str(
            personality_max) + ", and your persona is " + str(personas_max)
Beispiel #12
0
def spam_filter(msg=input("Enter message = ")):
    msg = TextBlob(msg)
    current_lang = msg.detect_language()
    print("Language of this message is = ", current_lang)
    if (current_lang != 'en'):
        msg.translate(to='en')
    else:
        msg.correct()
    X_dtm = vect.fit_transform(X)
    test_dtm = vect.transform([str(msg)])
    model.fit(X_dtm, y)
    result = model.predict(test_dtm)
    prob = model.predict_proba(test_dtm)
    if result == [1]:
        print("SPAM ALERT!")
    else:
        print("HAM")
        predsa = clf.predict(vectsa.transform([str(msg)]))

        if predsa == [1]:
            print("Positive Feeling")

        elif predsa == [0]:
            print("Negative Feeling")
        else:
            print("Can't analyze ur Felling...Try API ? ....")
        senti = indicoio.sentiment_hq(str(msg))
        print("Online Help , Positivity of Incoming Message = ", senti)
    p = indicoio.personality(str(msg))
    d = []
    d.append([
        p['agreeableness'], p['conscientiousness'], p['extraversion'],
        p['openness'], msg.sentiment.polarity, msg.sentiment.subjectivity
    ])
    traits = pd.DataFrame(d,
                          columns=[
                              'agreeableness', 'conscientiousness',
                              'extraversion', 'openness', 'polarity',
                              'subjectivity'
                          ])
    print(profanity.contains_profanity(str(msg)), " Profanity")
    print(profanity.censor(str(msg)))
    print("Summarizing this message =", msg.noun_phrases)
    percent = pd.DataFrame(prob, columns=["% HAM", "%SPAM"])
    print(traits)
    print(percent)
    def getPersonality(self, strArray):

        result = indicoio.personality(strArray)

        extraversion = []
        openness = []
        agreeableness = []
        conscientiousness = []

        for things in result:
            extraversion.append(things["extraversion"])
            openness.append(things["openness"])
            agreeableness.append(things["agreeableness"])
            conscientiousness.append(things["conscientiousness"])

        t = [extraversion, openness, agreeableness, conscientiousness]

        return [extraversion, openness, agreeableness, conscientiousness]
Beispiel #14
0
def process_response(response):
    print(str(response))
    questionType = response.question_id
    #questionType = Question.query.get(response.question_id);
    #print(str(question));
    #questionType = QuestionType.query.get(question.type_id);
    print("processing a " + str(questionType))
    result = {}
    user_input = response.response_text
    if (questionType == "Personality"):
        result = indicoio.personality(user_input)
    elif (questionType == "Emotion"):
        result = indicoio.emotion(user_input)
    elif (questionType == "Persona"):
        result = indicoio.personas(user_input)
    pprint(result)
    max_result = max(result, key=result.get)
    print("max response: " + max_result)
    return "" + max_result
    def getOverallResult(self, strArray):

        result = indicoio.personality(strArray)

        extraversion = []
        openness = []
        agreeableness = []
        conscientiousness = []

        for things in result:
            extraversion.append(things["extraversion"])
            openness.append(things["openness"])
            agreeableness.append(things["agreeableness"])
            conscientiousness.append(things["conscientiousness"])

        result = indicoio.political(strArray)

        libertarian = []
        green = []
        liberal = []
        conservative = []

        for things in result:
            libertarian.append(things["Libertarian"])
            green.append(things["Green"])
            liberal.append(things["Liberal"])
            conservative.append(things["Conservative"])

        result = indicoio.sentiment(strArray)

        t = [
            result,
            libertarian,
            green,
            liberal,
            conservative,
            extraversion,
            openness,
            agreeableness,
            conscientiousness,
        ]

        return t
def gimme_the_goods(text, tag_count=3, persona_count=3):

    # Consume some of that api for analysis
    sentiment = indicoio.sentiment(text)
    # TODO figure out a better way to handle this bug
    political = indicoio.political(text[0:1100])
    personality = indicoio.personality(text)
    personas = indicoio.personas(text)
    tags = indicoio.text_tags(text, top_n=tag_count)

    # Sort the personas to grab top ones
    top_personas = dict(
        sorted(personas.items(), key=operator.itemgetter(1),
               reverse=True)[:persona_count])

    # Truncate the values to 3 decimals for cleanliness
    roundness = 3
    sentiment = truncate_values(sentiment, roundness)
    political = truncate_values(political, roundness)
    personality = truncate_values(personality, roundness)
    top_personas = truncate_values(top_personas, roundness)
    tags = truncate_values(tags, roundness)

    # Rearrange the personas a bit
    final_personas = []
    for key, value in top_personas.items():
        final_personas.append({
            'type': persona_mapping[key],
            'name': key,
            'value': value,
        })

    return_dict = {
        'sentiment': sentiment,
        'political': political,
        'personality': personality,
        'personas': final_personas,
        'tags': tags
    }

    return return_dict
def gimme_the_goods(text, tag_count=3, persona_count=3):
        
    # Consume some of that api for analysis
    sentiment = indicoio.sentiment(text)
    # TODO figure out a better way to handle this bug
    political = indicoio.political(text[0:1100])
    personality = indicoio.personality(text)
    personas = indicoio.personas(text)
    tags = indicoio.text_tags(text, top_n=tag_count)

    # Sort the personas to grab top ones
    top_personas = dict(sorted(personas.items(),
                        key=operator.itemgetter(1),
                        reverse=True)[:persona_count])
    
    # Truncate the values to 3 decimals for cleanliness
    roundness = 3
    sentiment = truncate_values(sentiment, roundness)
    political = truncate_values(political, roundness)
    personality = truncate_values(personality, roundness)
    top_personas = truncate_values(top_personas, roundness)
    tags = truncate_values(tags, roundness)
    
    # Rearrange the personas a bit
    final_personas = []
    for key, value in top_personas.items():
        final_personas.append({
            'type': persona_mapping[key],
            'name': key,
            'value': value,
        })
    
    return_dict = {
        'sentiment': sentiment,
        'political': political,
        'personality': personality,
        'personas': final_personas,
        'tags': tags
    }

    return return_dict
Beispiel #18
0
def main_func(name):
    global data, personality
    csv_name = name + '_tweets.csv'
    if not os.path.isfile(csv_name):
        get_all_tweets(name)
    with open(csv_name) as file:
        reader = csv.reader(file)
        count = 0
        for row in reader:
            if len(row) > 0:
                text = row[2][2:-1]
                #print(text)
                if "https" in text:
                    continue

                data.append(text)
                count += 1
            if count > 10:
                break

    options = {
        'conscientiousness': cons,
        'openness': ope,
        'agreeableness': agr,
        'extraversion': extra
    }

    t0 = time()
    for i in data:

        mini_dic = indicoio.personality(i)

        result = max(mini_dic.keys(), key=(lambda k: mini_dic[k]))
        if result in options:
            options[result]()
        else:
            pass

    the_personality = max(personality.keys(), key=(lambda k: personality[k]))
    clear()
    return the_personality
Beispiel #19
0
def initialize_events_by_personality():
    '''
	Function to scrape the most recent events from Waterloo Open Data and to use sentiment analysis 
	to determine the personality trait of the events based on the given description.
	'''
    now = datetime.datetime.now()
    event_info, all_events, event_names, processed = [], [], [], []

    # The personality that are posible due to sentiment analysis algorithm:
    coae = ["conscientiousness", "openness", "agreeableness", "extraversion"]

    url = 'http://maps.waterloo.ca/OpenData/events.csv'
    with urllib.request.urlopen(url) as file:
        reader = csv.reader(file)
        next(reader)

        for index, row in enumerate(reader):
            # skips over empty rows of data
            if len(row) > 0:
                # Clean up: Category, Event Description and Name that contain embedded html tags
                Category = row[5].replace('<p>',
                                          "").replace('</p>',
                                                      "").replace("&quot", "")
                Description = row[7].replace('<p>',
                                             "").replace('</p>', "").replace(
                                                 "&quot", "")
                Name = row[13].replace('<p>',
                                       "").replace('</p>',
                                                   "").replace("&quot", "")

                # Clean up date information
                dateArr = row[2][:10].split("/")

                Month = int(dateArr[0])
                Day = int(dateArr[1])
                Year = int(dateArr[2])

                Date = (Day, Month, Year)

                # If the event has already occured, skip the event
                if not (Year >= now.year and Month >= now.month
                        and Day >= now.day):
                    continue
                try:
                    event_info.extend([name, category, description, Date])

                    # If the event is already accunted for, clear small_list and go to ext row
                    if name in event_names:
                        event_info = []
                        continue
                    else:
                        event_names.append(name)
                        all_events.append(event_info)
                        event_info = []

                except UnicodeEncodeError:
                    pass

            # As this is a csv file, a stoping row should be specified or reader will iterate over empty rows
            if index > 600:
                break

    for event in all_events:
        try:
            if event[2] == '':  # if no description is present
                continue

            # Use sentiment analysis to determine personality of event
            mini_dic = indicoio.personality(str(event[2]))
            result = max(mini_dic.keys(), key=(lambda k: mini_dic[k]))

            # _ -> eventName, emotion, type of event, description, date tuple(day month year)
            _ = (event[0], result, event[1], event[2], event[3])
            processed.append(_)

        except UnicodeEncodeError:
            pass

    def createCSV():
        name = coae.pop(-1)
        label = name + ".csv"
        # Create for CSV files containing the events corresponding to the personality types
        with open(label, 'w', newline='') as f:
            thewriter = csv.writer(f)
            thewriter.writerow([
                "Event Name", "Personality Type", "Type of Event",
                "Description", "Day", "Month", "Year"
            ])
            for row in processed:
                if row[1] == name:
                    thewriter.writerow([
                        row[0], row[1], row[2], row[3], row[4][0], row[4][1],
                        row[4][2]
                    ])

    # Initialize threads to create an events CSV file for each personality
    t1 = threading.Thread(name='con', target=createCSV)
    t2 = threading.Thread(name='open', target=createCSV)
    t3 = threading.Thread(name='agre', target=createCSV)
    t4 = threading.Thread(name='extra', target=createCSV)

    threads = [t1] + [t2] + [t3] + [t4]

    for x in threads:
        x.start()

    for x in threads:
        x.join()
Beispiel #20
0
 def personality_measure(self, text):
     return indicoio.personality(text)
Beispiel #21
0
                count += 1

            except UnicodeEncodeError:
                pass

        if count > 600:
            break

processed = []
for el in bigger_list:
    try:
        #print("THIS IS ITTTTT '" +str(el[2]) + "'")
        if el[2] == '':
            continue
        mini_dic = indicoio.personality(str(el[2]))
        result = max(mini_dic.keys(), key=(lambda k: mini_dic[k]))
        #el.append[result]
        # eventName, emotion, type of event, description, date tuple(day month year)
        # index 0, 1, 2, 3, 4[0], 4[1], 4[2]
        _in = (el[0], result, el[1], el[2], el[3])
        # pairs.append(el[0])
        processed.append(_in)
        print(_in, '\n')
    except UnicodeEncodeError:
        pass

print(len(processed))


def createCSV():
Beispiel #22
0
                     st._json['entities']['urls'][i]['expanded_url'])
             urls.append((count - 15.12200866779725) / 11.892945461889907)
     except:
         pass
 pers_values = []
 for elem in statuses:
     txt = elem.text
     words = txt.split(' ')
     for word in words[::-1]:
         if '@' in word:
             words.remove(word)
         elif 'http' in word:
             words.remove(word)
     if words:
         try:
             pers = indicoio.personality(' '.join(words))
             pers_values.append(pers)
         except Exception as e:
             print(e)
 op = np.mean([pers_values[i]['openness'] for i in range(len(pers_values))])
 ag = np.mean(
     [pers_values[i]['agreeableness'] for i in range(len(pers_values))])
 ex = np.mean(
     [pers_values[i]['extraversion'] for i in range(len(pers_values))])
 co = np.mean(
     [pers_values[i]['conscientiousness'] for i in range(len(pers_values))])
 url = np.mean(urls)
 bl_info = listing.get_fake_site_info(website[0])
 if bl_info == 0:
     person = 1
 else:
Beispiel #23
0
                         db="nytimes")
    cur = db.cursor()

    sentimentValues = []
    politicalValues = []
    personalityValues = []
    emotionValues = []
    start = 3474
    itr = start
    for i in range(start, num + start):
        print("starting chunk " + str(itr) + " !")
        itr += 1
        curText = allText[i][:1000]
        sentimentValues = indicoio.sentiment_hq(curText)
        politicalValues = indicoio.political(curText)
        personalityValues = indicoio.personality(curText)
        emotionValues = indicoio.emotion(curText)
        abst = repr(allText[i]).replace("'", '').replace('"', '')
        SQLline = 'INSERT INTO `article`(`abst`, `url`, `sent`, `serv`, `gree`, `libe`, `libt`, `agre`, `cons`, `extr`, `open`, `ange`, `fear`, `joyy`, `sadd`, `surp`) VALUES ("' + abst + '"  ,"' + repr(
            articles[i]["url"]) + '",' + str(sentimentValues) + ',' + str(
                politicalValues["Conservative"]
            ) + ',' + str(politicalValues["Green"]) + ',' + str(
                politicalValues["Liberal"]
            ) + ',' + str(politicalValues["Libertarian"]) + ',' + str(
                personalityValues["agreeableness"]
            ) + ',' + str(personalityValues["conscientiousness"]) + ',' + str(
                personalityValues["extraversion"]) + ',' + str(
                    personalityValues["openness"]) + ',' + str(
                        emotionValues["anger"]) + ',' + str(
                            emotionValues["fear"]) + ',' + str(
                                emotionValues["joy"]) + ',' + str(
Beispiel #24
0
    for word in nltk.word_tokenize(sent.lower()):
        if word in word_frequencies.keys():
            if len(sent.split(' ')) < 30:
                if sent not in sentence_scores.keys():
                    sentence_scores[sent] = word_frequencies[word]
                else:
                    sentence_scores[sent] += word_frequencies[word]

summary_sentences = heapq.nlargest(len(sentences),
                                   sentence_scores,
                                   key=sentence_scores.get)

video_info["summary_variable"] = summary_sentences

politicalValues = indicoio.political(text)
personalityValues = indicoio.personality(text)
emotionValues = indicoio.emotion(text)

video_info["political"] = politicalValues
video_info["personality"] = personalityValues
video_info["emotion"] = emotionValues
video_info["url"] = url


class MyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, numpy.integer):
            return int(obj)
        elif isinstance(obj, numpy.floating):
            return float(obj)
        elif isinstance(obj, numpy.ndarray):
Beispiel #25
0
def info(yt_url):
    video_info = {}

    url = "https://www.youtube.com/watch?v=" + yt_url
    yt = YouTube(url)

    video_info["timestamped"] = []

    # get the audio file
    a = yt.captions.get_by_language_code('en')
    caps = a.generate_srt_captions()
    caps = caps.split("\n\n")
    caps = [i.split("\n") for i in caps]
    text = ""

    for i in caps:
        for j in i[2:]:
            text += j
            line = " ".join(i[2:])
            line = re.sub(r"<[^<]+?>", '', line)
            try:
                video_info["timestamped"].append([
                    i[1].split(" --> ")[0],
                    i[1].split(" --> ")[1],
                    line
                ])
            except:
                pass

    text = re.sub(r"<[^>]*>", " ", text)
    text = re.sub(r'\s+', ' ', text)
    text = re.sub(r"<[^<]+?>", '', text)
    text = text.replace("...", ".")
    text = text.replace("…", "")
    text = text.replace(".", ". ")
    text = re.sub(r'\s+', ' ', text)
    sentences = nltk.sent_tokenize(text)
    video_info["full_transcript"] = text
    stopwords = nltk.corpus.stopwords.words('english')

    word_frequencies = {}
    for word in nltk.word_tokenize(text):
        if word not in stopwords:
            if word not in word_frequencies.keys():
                word_frequencies[word] = 1
            else:
                word_frequencies[word] += 1

    maximum_frequency = max(word_frequencies.values())
    for word in word_frequencies.keys():
        word_frequencies[word] = (word_frequencies[word] / maximum_frequency)

    sentence_scores = {}
    for sent in sentences:
        for word in nltk.word_tokenize(sent.lower()):
            if word in word_frequencies.keys():
                if len(sent.split(' ')) < 30:
                    if sent not in sentence_scores.keys():
                        sentence_scores[sent] = word_frequencies[word]
                    else:
                        sentence_scores[sent] += word_frequencies[word]

    summary_sentences = heapq.nlargest(len(sentences), sentence_scores, key=sentence_scores.get)

    video_info["summary_variable"] = summary_sentences

    politicalValues = indicoio.political(text)
    personalityValues = indicoio.personality(text)
    emotionValues = indicoio.emotion(text)

    video_info["political"] = politicalValues
    video_info["personality"] = personalityValues
    video_info["emotion"] = emotionValues
    video_info["sentiment"] = indicoio.sentiment(text)
    video_info["url"] = url

    class MyEncoder(json.JSONEncoder):
        def default(self, obj):
            if isinstance(obj, numpy.integer):
                return int(obj)
            elif isinstance(obj, numpy.floating):
                return float(obj)
            elif isinstance(obj, numpy.ndarray):
                return obj.tolist()
            else:
                return super(MyEncoder, self).default(obj)

    return json.dumps(video_info, cls=MyEncoder)
Beispiel #26
0
def execute(USERNAME, target, refresh):

    r_data = io_helper.read_raw(USERNAME, target)

    og = sys.stdout
    fpath = io_helper.out_path(USERNAME, target)

    def analysis(raw='', limit=5, text='', percent=True):
        global meta_dict
        # print lines if input is a list of non-dicts
        # if input is list of dicts, merge dicts and resend to analysis
        if isinstance(raw, list):
            for item in raw:
                if not isinstance(item, dict):
                    print(item)
                else:
                    create_meta_dict(item)
            analysis(meta_dict, limit, text, percent)

        # if input is dict: print k, v pairs
        # optional args for return limit and description text
        if isinstance(raw, dict):
            print(text)
            ct = 0
            for v in sorted(raw, key=raw.get, reverse=True):
                ct += 1
                if ct > limit: break
                if isinstance(raw[v], float):
                    if percent: per = r'%'
                    else: per = ''
                    print("    " + v, str(round(raw[v] * 100, 2)) + per)
                else:
                    print(v, raw[v])
            print()

    def create_meta_dict(item):
        # merge list of dicts into master dict
        global meta_dict
        meta_dict[item['text']] = item['confidence']
        return meta_dict

    rClean = ''
    for i in range(len(r_data)):
        if r_data[i - 1] == '\\':
            rClean = rClean[:-1]
            if r_data[i] != "'":
                continue

        if r_data[i] == '*':
            rClean += ' '
        else:
            rClean += r_data[i]

    r_data = rClean
    del rClean
    indicoio.config.api_key = keycheck.get_key()

    # Big 5
    big5 = {
        'text': "Big 5 personality inventory matches: ",
        "payload": indicoio.personality(r_data)
    }

    # Meyers briggs
    mbtiLabels = indicoio.personas(r_data)
    mbti_dict = {
        'architect': 'intj',
        'logician': 'intp',
        'commander': 'entj',
        'debater': 'entp',
        'advocate': 'infj',
        'mediator': 'infp',
        'protagonist': 'enfj',
        'campaigner': 'enfp',
        'logistician': 'istj',
        'defender': 'isfj',
        'executive': 'estj',
        'consul': 'esfj',
        'virtuoso': 'istp',
        'adventurer': 'isfp',
        'entrepreneur': 'estp',
        'entertainer': 'esfp'
    }

    def replace_mbti():
        for k, v in mbtiLabels.items():
            k = k.replace(k, mbti_dict[k])
            yield k

    k = (list(replace_mbti()))
    v = map(lambda x: x, mbtiLabels.values())
    payload = (dict(zip(k, v)))

    mbti = {
        'text': "Most likely personalilty styles: ",
        "payload": payload,
        'ct': 5,
        'percent': True
    }

    # Political
    pol = {
        'text': "Political alignments: ",
        "payload": indicoio.political(r_data, version=1)
    }
    # Sentiment
    sen = {
        'text': "Sentiment: ",
        "payload": {
            'Percent positive': indicoio.sentiment(r_data)
        },
        'ct': 3
    }

    # Emotion
    emo = {
        'text': "Predominant emotions:",
        "payload": indicoio.emotion(r_data),
        'ct': 5
    }

    # Keywords
    kw = {'text': "Keywords: ", "payload": indicoio.keywords(r_data), 'ct': 5}
    # Text tags
    tt = {
        'text': "Text tags: ",
        "payload": indicoio.text_tags(r_data),
        'ct': 10
    }
    # Place
    pla = {
        'text': "Key locations: ",
        'payload': indicoio.places(r_data, version=2),
        'ct': 3,
        'percent': True
    }

    def Karma(USERNAME):
        import praw
        import collections
        kList = []
        user_agent = ("N2ITN")
        r = praw.Reddit(user_agent=user_agent)
        thing_limit = 100

        user = r.get_redditor(USERNAME)
        gen = user.get_submitted(limit=thing_limit)
        karma_by_subreddit = {}
        for thing in gen:
            subreddit = thing.subreddit.display_name
            karma_by_subreddit[subreddit] = (
                karma_by_subreddit.get(subreddit, 0) + thing.score)

        for w in sorted(karma_by_subreddit,
                        key=karma_by_subreddit.get,
                        reverse=True):
            kList.append(str(w) + ': ' + str(karma_by_subreddit[w]))
        kList.insert(0, 'Karma by Sub')

        print("\n\t".join(kList[:10]))

    def show(results):
        # Accepts bag of dicts, or single dict
        if not isinstance(results, dict):
            for X in results:
                show(X)
        else:
            if results == pla and pla['payload'] == []:
                print("Not enough information to infer place of origin")
                print()
            else:

                i = results
                analysis(raw=i.get('payload', ''),
                         limit=i.get('ct', 5),
                         text=i.get('text', ''),
                         percent=i.get('percent', True))

    with open(fpath, 'w') as outtie:
        sys.stdout = outtie
        print(target + USERNAME)
        print()
        show([kw, pla, big5, emo, sen, pol, mbti, tt])
        Karma(USERNAME)

        sys.stdout = og
    return
Beispiel #27
0
#CALL INDICO.IO EMOTION API

import indicoio
indicoio.config.api_key = 'ba420e48e2322e7e99b674c9d1d3a5d2'

extraversion_array = []
openness_array = []
agreeableness_array = []
conscientiousness_array = []
error2 = []

for y in range(0, 3000):

    try:
        result = indicoio.personality(array[y])

        data_string = json.dumps(result)
        decoded_json = json.loads(data_string)

        extraversion_result = str(decoded_json["extraversion"] * 100)
        openness_result = str(decoded_json["openness"] * 100)
        agreeableness_result = str(decoded_json["agreeableness"] * 100)
        conscientiousness_result = str(decoded_json["conscientiousness"] * 100)

        extraversion_array.append(extraversion_result)
        openness_array.append(openness_result)
        agreeableness_array.append(agreeableness_result)
        conscientiousness_array.append(conscientiousness_result)
        print "FINISH " + str(y)
    except KeyError:
Beispiel #28
0
def eventPersonality(str): 
	return(indicoio.personality(str))
Beispiel #29
0
		if len(row) > 0:
			text = row[2][2:-1]
			#print(text)

			data.append(text)
			count +=1
			
		

		if count > 8:
			break

personality =  {'conscientiousness': 0, 'openness': 0, 'agreeableness': 0, 'extraversion':0} 
options = {'conscientiousness': cons, 'openness': ope, 'agreeableness': agr, 'extraversion':extra} 
for i in data:
	mini_dic = indicoio.personality(i)
 	#result = max(mini_dic)
 	if result in options:
 		options[result]()
    else:
    	pass
	#print('\n')
	print_count += 1

	if print_count > 5:
		break

print("Trump is a {} person".format(max(personality)))
		
def cons():
	global personality 
Beispiel #30
0
    def evaluate(self, data_list):
        gstart = time.time()
        tweets = []
        articles = []
        tweets_text = []
        tweets_username = []
        for _, data in enumerate(data_list):
            if isinstance(data, str):
                tweets.append(
                    self.api.get_status(data.split("/")[-1],
                                        tweet_mode='extended'))
            else:
                tweets.append(data)
        for element in tweets:
            articles_urls = []
            for _ in range(len(element._json['entities']['urls'])):
                articles_urls.append(
                    element._json['entities']['urls'][_]['expanded_url'])
            tweets_text.append(element._json['full_text'])
            tweets_username.append(element._json['user']['id'])
            articles.extend(articles_urls)

        score = []
        user_eval = []
        article_eval = []
        page_quality = []
        urls = []
        final = []
        start = time.time()
        for us in tweets_username:
            statuses = self.api.user_timeline(us, count=50)
            pers_values = []
            for elem in statuses:
                txt = elem.text
                words = txt.split(' ')
                for word in words[::-1]:
                    if '@' in word:
                        words.remove(word)
                    elif 'http' in word:
                        words.remove(word)
                if words:
                    try:
                        pers = indicoio.personality(' '.join(words))
                        pers_values.append(pers)
                    except Exception as e:
                        print(e)
            op = np.mean(
                [pers_values[i]['openness'] for i in range(len(pers_values))])
            ag = np.mean([
                pers_values[i]['agreeableness']
                for i in range(len(pers_values))
            ])
            ex = np.mean([
                pers_values[i]['extraversion'] for i in range(len(pers_values))
            ])
            co = np.mean([
                pers_values[i]['conscientiousness']
                for i in range(len(pers_values))
            ])
            print([op, ag, ex, co])
            user_eval.append(
                self.personality_clf.predict_proba([[ag, co, ex, op]])[0][0])
        print('personality: {}'.format(time.time() - start))

        for element in articles:
            start = time.time()
            url = unshorten_url(element)
            urls.append(url)
            print("unshortening: {}".format(time.time() - start))
            start = time.time()
            count = self.banner_counter.iframe_detector(
                url) + self.banner_counter.count_ads_th(url)
            page_quality.append(
                (count - 15.12200866779725) / 11.892945461889907)
            print("banner counter: {}".format(time.time() - start))
            start = time.time()
            bl_info = listing.get_fake_site_info(url)
            if bl_info == 0:
                user_eval[0] = 1
            print("blacklisting: {}".format(time.time() - start))
            start = time.time()
            txt = scrape(url)
            print(txt)
            print("scraping: {}".format(time.time() - start))
            start = time.time()
            features = ml.extract_features(txt)
            features.append(count)
            features = np.asarray(features).reshape(1, -1)
            print("svm: {}".format(time.time() - start))
            start = time.time()
            res = self.clf.predict_proba(features)
            print("prediction: {}".format(time.time() - start))
            article_eval.append(res[0][0])
            final.append(
                self.log_reg.predict_proba(
                    [[user_eval[0], article_eval[0], page_quality[0]]])[0][0])
            print("total time: {}".format(time.time() - gstart))

            print('article eval vs final score: {} vs {}'.format(
                article_eval[0], final[0]))

        score.append({  # "tweet_url": element,  # string
            "article_url": urls[0],  # string
            "page_quality": page_quality[0],  # string
            "text_evaluation": article_eval[0],  # string
            "source_reliability": user_eval[0],  # string
            "final_score": article_eval[0]
        })
        return json.dumps(score)
Beispiel #31
0
def execute(USERNAME, target, refresh):

    r_data = io_helper.read_raw(USERNAME, target)

    og = sys.stdout
    fpath = io_helper.out_path(USERNAME, target)

    def analysis(raw='', limit=5, text='', percent=True):
        global meta_dict
        # print lines if input is a list of non-dicts
        # if input is list of dicts, merge dicts and resend to analysis
        if isinstance(raw, list):
            for item in raw:
                if not isinstance(item, dict):
                    print(item)
                else:
                    create_meta_dict(item)
            analysis(meta_dict, limit, text, percent)

        # if input is dict: print k, v pairs
        # optional args for return limit and description text
        if isinstance(raw, dict):
            print(text)
            ct = 0
            for v in sorted(raw, key=raw.get, reverse=True):
                ct += 1
                if ct > limit: break
                if isinstance(raw[v], float):
                    if percent: per = r'%'
                    else: per = ''
                    print("    " + v, str(round(raw[v] * 100, 2)) + per)
                else:
                    print(v, raw[v])
            print()

    def create_meta_dict(item):
        # merge list of dicts into master dict
        global meta_dict
        meta_dict[item['text']] = item['confidence']
        return meta_dict

    rClean = ''
    for i in range(len(r_data)):
        if r_data[i - 1] == '\\':
            rClean = rClean[:-1]
            if r_data[i] != "'":
                continue

        if r_data[i] == '*':
            rClean += ' '
        else:
            rClean += r_data[i]

    r_data = rClean
    del rClean
    indicoio.config.api_key = keycheck.get_key()

    # Big 5
    big5 = {'text': "Big 5 personality inventory matches: ", "payload": indicoio.personality(r_data)}

    # Meyers briggs
    mbtiLabels = indicoio.personas(r_data)
    mbti_dict = {
        'architect': 'intj',
        'logician': 'intp',
        'commander': 'entj',
        'debater': 'entp',
        'advocate': 'infj',
        'mediator': 'infp',
        'protagonist': 'enfj',
        'campaigner': 'enfp',
        'logistician': 'istj',
        'defender': 'isfj',
        'executive': 'estj',
        'consul': 'esfj',
        'virtuoso': 'istp',
        'adventurer': 'isfp',
        'entrepreneur': 'estp',
        'entertainer': 'esfp'
    }

    def replace_mbti():
        for k, v in mbtiLabels.items():
            k = k.replace(k, mbti_dict[k])
            yield k

    k = (list(replace_mbti()))
    v = map(lambda x: x, mbtiLabels.values())
    payload = (dict(zip(k, v)))

    mbti = {'text': "Most likely personalilty styles: ", "payload": payload, 'ct': 5, 'percent': True}

    # Political
    pol = {'text': "Political alignments: ", "payload": indicoio.political(r_data, version=1)}
    # Sentiment
    sen = {'text': "Sentiment: ", "payload": {'Percent positive': indicoio.sentiment(r_data)}, 'ct': 3}

    # Emotion 
    emo = {'text': "Predominant emotions:", "payload": indicoio.emotion(r_data), 'ct': 5}

    # Keywords
    kw = {'text': "Keywords: ", "payload": indicoio.keywords(r_data), 'ct': 5}
    # Text tags
    tt = {'text': "Text tags: ", "payload": indicoio.text_tags(r_data), 'ct': 10}
    # Place
    pla = {
        'text': "Key locations: ",
        'payload': indicoio.places(r_data, version=2),
        'ct': 3,
        'percent': True
    }

    def Karma(USERNAME):
        import praw
        import collections
        kList = []
        user_agent = ("N2ITN")
        r = praw.Reddit(user_agent=user_agent)
        thing_limit = 100

        user = r.get_redditor(USERNAME)
        gen = user.get_submitted(limit=thing_limit)
        karma_by_subreddit = {}
        for thing in gen:
            subreddit = thing.subreddit.display_name
            karma_by_subreddit[subreddit] = (karma_by_subreddit.get(subreddit, 0) + thing.score)

        for w in sorted(karma_by_subreddit, key=karma_by_subreddit.get, reverse=True):
            kList.append(str(w) + ': ' + str(karma_by_subreddit[w]))
        kList.insert(0, 'Karma by Sub')

        print("\n\t".join(kList[:10]))

    def show(results):
        # Accepts bag of dicts, or single dict
        if not isinstance(results, dict):
            for X in results:
                show(X)
        else:
            if results == pla and pla['payload'] == []:
                print("Not enough information to infer place of origin")
                print()
            else:

                i = results
                analysis(
                    raw=i.get('payload', ''),
                    limit=i.get('ct', 5),
                    text=i.get('text', ''),
                    percent=i.get('percent', True)
                )

    with open(fpath, 'w') as outtie:
        sys.stdout = outtie
        print(target + USERNAME)
        print()
        show([kw, pla, big5, emo, sen, pol, mbti, tt])
        # Karma(USERNAME)

        sys.stdout = og
    return