コード例 #1
0
ファイル: NNET_5d.py プロジェクト: rajvadranam/Veracity
def Read_ownerTweets(file):
    """
        function to read Json and creates the data points for N-D model and OTC components into a numeric form
       :param file: filename containing flume json strings
       :return: a list of caliculated N values [V1,V2, V3, V4, V5] and OTC values [OTCnorm, recp]
       """
    global x, y, totalTweets, ownertweets
    fileObject = open(file, encoding="utf8")
    Lines = fileObject.readlines()
    totalTweets = len(Lines)
    for line in Lines:
        try:
            parsed_json_tweets = json.loads(line)
            if 'retweeted_status' in parsed_json_tweets:
                ownertweets += 1
                ownerName = parsed_json_tweets['retweeted_status']['user'][
                    'screen_name'].lstrip().strip()
                ownerTweetTimeStamp = parsed_json_tweets['retweeted_status'][
                    'created_at'].lstrip().strip()
                ownerFollercount = parsed_json_tweets['retweeted_status']['user']['followers_count']
                ownerretweetcount = parsed_json_tweets['retweeted_status']['retweet_count']
                try:
                    Owner_tweet_text = parsed_json_tweets['retweeted_status']['extended_tweet'][
                        'full_text'].lstrip().strip()
                except:
                    Owner_tweet_text = parsed_json_tweets['retweeted_status'][
                        'text'].lstrip().strip()

                # Step 1
                # calculate V1 ( retweet count - followercount of owner(original tweets))
                V1 = 0
                if (int(ownerretweetcount) > 0):
                    V1 = (int(ownerretweetcount) - int(ownerFollercount)) / int(ownerretweetcount)

                # splitting into words for each word
                # Step 2

                '''Calculate sentiment for each word in TWEET AND MAKE SENTIMENT ANALYSIS'''

                score = get_afinn_scores(Owner_tweet_text)
                V2 = int(score['positive'])
                V3 = int(score['negative'])
                V4 = int(score['neutral'])
                # Step 3

                '''ENTROPY STEPS Pi Log Pi'''
                wordLength = len(clean(Owner_tweet_text))
                EachWordCount = Counter.__call__(clean(Owner_tweet_text))
                p = []
                for x, y in EachWordCount.items():
                    pi = EachWordCount[x] / wordLength
                    p.append(pi * float(math.log(pi, 2)))
                V5 = 0
                for x in p:
                    V5 += x
                V5 = -(V5)
                if (V1 < 0):
                    V1 = 0
                OriginaltweetMap[ownerName + "," + ownerTweetTimeStamp] = [ownerName, ownerTweetTimeStamp,
                                                                           ownerFollercount, ownerretweetcount,
                                                                           Owner_tweet_text.replace('\n', ''), V1,
                                                                           V2, V3, V4, V5]
        except ValueError:
            continue
    return totalTweets, OriginaltweetMap
コード例 #2
0
def Read_ownerTweets(file):
    global x, y, totalTweets, ownertweets
    fileObject = open(file, encoding="utf8")
    Lines = fileObject.readlines()
    totalTweets = len(Lines)
    for line in Lines:
        try:
            parsed_json_tweets = json.loads(line)
            this_user_handle = parsed_json_tweets['user'][
                'screen_name'].lstrip().strip()
            retweet_count = parsed_json_tweets['retweet_count']
            UserFollowerCount = parsed_json_tweets['user']['followers_count']
            tweet_text = parsed_json_tweets['text'].lstrip().strip()
            if 'retweeted_status' in parsed_json_tweets:
                ownertweets += 1
                ownerName = parsed_json_tweets['retweeted_status']['user'][
                    'screen_name'].lstrip().strip()
                ownerTweetTimeStamp = parsed_json_tweets['retweeted_status'][
                    'created_at'].lstrip().strip()
                ownerFollercount = parsed_json_tweets['retweeted_status'][
                    'user']['followers_count']
                ownerretweetcount = parsed_json_tweets['retweeted_status'][
                    'retweet_count']
                try:
                    Owner_tweet_text = parsed_json_tweets['retweeted_status'][
                        'extended_tweet']['full_text'].lstrip().strip()
                except:
                    Owner_tweet_text = parsed_json_tweets['retweeted_status'][
                        'text'].lstrip().strip()
                    # bigrams = [b for l in wordlist for b in zip(l[:-1], l[1:])]
                Owner_tweet_text = Find(Owner_tweet_text)
                if Owner_tweet_text != "":
                    bigrams = []
                    bigramcounter = {}
                    wordLength = len(clean(Owner_tweet_text))
                    wordlist = clean(Owner_tweet_text)
                    EachWordCount = Counter.__call__(clean(Owner_tweet_text))
                    if len(wordlist) > 3:
                        bigrams = list(ngrams(wordlist, 2))
                        bigramcounter = dict(Counter.__call__(bigrams))
                    values = []
                    V1MI = 0
                    adder = 0
                    for s in bigrams:
                        tt = mutualInformation(s[0], s[1], EachWordCount,
                                               bigramcounter)
                        adder += tt
                        values.append(tt)
                    if adder != 0:
                        V1MI = float(adder / len(values))

                    V2sentiObject = TextBlob(Owner_tweet_text).sentiment

                    # p = []
                    # for x, y in EachWordCount.items():
                    #     pi = EachWordCount[x] / wordLength
                    #     p.append(pi * float(math.log(pi, 2)))
                    V13 = lexicon.analyze(Owner_tweet_text, normalize=True)
                    try:
                        deception = (
                            V13['deception'] +
                            (V13['money'] + V13['hate'] + V13['envy'] +
                             V13['crime'] + V13['magic'] + V13['fear'] +
                             V13['lust'] + V13['power'] / 8))
                    except:
                        deception = V13['deception']

                    OTCnorm = [
                        float(i) / max([
                            V1MI, 1 - V2sentiObject.subjectivity, 1 - deception
                        ]) for i in
                        [V1MI, 1 - V2sentiObject.subjectivity, 1 - deception]
                        if max([
                            V1MI, 1 - V2sentiObject.subjectivity, 1 - deception
                        ]) != 0
                    ]
                    recp = abs((sum(OTCnorm) / 3))
                    # print(Owner_tweet_text.replace('\n', ''),recp)
                    OriginaltweetMap[ownerName + "," + ownerTweetTimeStamp] = [
                        ownerName, ownerTweetTimeStamp, ownerFollercount,
                        ownerretweetcount,
                        Owner_tweet_text.replace('\n', ''), OTCnorm, recp
                    ]
        except ValueError:
            continue
    return totalTweets, OriginaltweetMap