def Read_ownerTweets(file): """ function to read Json and creates the data points for N-D model and OTC components into a numeric form :param file: filename containing flume json strings :return: a list of caliculated N values [V1,V2, V3, V4, V5] and OTC values [OTCnorm, recp] """ global x, y, totalTweets, ownertweets fileObject = open(file, encoding="utf8") Lines = fileObject.readlines() totalTweets = len(Lines) for line in Lines: try: parsed_json_tweets = json.loads(line) if 'retweeted_status' in parsed_json_tweets: ownertweets += 1 ownerName = parsed_json_tweets['retweeted_status']['user'][ 'screen_name'].lstrip().strip() ownerTweetTimeStamp = parsed_json_tweets['retweeted_status'][ 'created_at'].lstrip().strip() ownerFollercount = parsed_json_tweets['retweeted_status']['user']['followers_count'] ownerretweetcount = parsed_json_tweets['retweeted_status']['retweet_count'] try: Owner_tweet_text = parsed_json_tweets['retweeted_status']['extended_tweet'][ 'full_text'].lstrip().strip() except: Owner_tweet_text = parsed_json_tweets['retweeted_status'][ 'text'].lstrip().strip() # Step 1 # calculate V1 ( retweet count - followercount of owner(original tweets)) V1 = 0 if (int(ownerretweetcount) > 0): V1 = (int(ownerretweetcount) - int(ownerFollercount)) / int(ownerretweetcount) # splitting into words for each word # Step 2 '''Calculate sentiment for each word in TWEET AND MAKE SENTIMENT ANALYSIS''' score = get_afinn_scores(Owner_tweet_text) V2 = int(score['positive']) V3 = int(score['negative']) V4 = int(score['neutral']) # Step 3 '''ENTROPY STEPS Pi Log Pi''' wordLength = len(clean(Owner_tweet_text)) EachWordCount = Counter.__call__(clean(Owner_tweet_text)) p = [] for x, y in EachWordCount.items(): pi = EachWordCount[x] / wordLength p.append(pi * float(math.log(pi, 2))) V5 = 0 for x in p: V5 += x V5 = -(V5) if (V1 < 0): V1 = 0 OriginaltweetMap[ownerName + "," + ownerTweetTimeStamp] = [ownerName, ownerTweetTimeStamp, ownerFollercount, ownerretweetcount, Owner_tweet_text.replace('\n', ''), V1, V2, V3, V4, V5] except ValueError: continue return totalTweets, OriginaltweetMap
def Read_ownerTweets(file): global x, y, totalTweets, ownertweets fileObject = open(file, encoding="utf8") Lines = fileObject.readlines() totalTweets = len(Lines) for line in Lines: try: parsed_json_tweets = json.loads(line) this_user_handle = parsed_json_tweets['user'][ 'screen_name'].lstrip().strip() retweet_count = parsed_json_tweets['retweet_count'] UserFollowerCount = parsed_json_tweets['user']['followers_count'] tweet_text = parsed_json_tweets['text'].lstrip().strip() if 'retweeted_status' in parsed_json_tweets: ownertweets += 1 ownerName = parsed_json_tweets['retweeted_status']['user'][ 'screen_name'].lstrip().strip() ownerTweetTimeStamp = parsed_json_tweets['retweeted_status'][ 'created_at'].lstrip().strip() ownerFollercount = parsed_json_tweets['retweeted_status'][ 'user']['followers_count'] ownerretweetcount = parsed_json_tweets['retweeted_status'][ 'retweet_count'] try: Owner_tweet_text = parsed_json_tweets['retweeted_status'][ 'extended_tweet']['full_text'].lstrip().strip() except: Owner_tweet_text = parsed_json_tweets['retweeted_status'][ 'text'].lstrip().strip() # bigrams = [b for l in wordlist for b in zip(l[:-1], l[1:])] Owner_tweet_text = Find(Owner_tweet_text) if Owner_tweet_text != "": bigrams = [] bigramcounter = {} wordLength = len(clean(Owner_tweet_text)) wordlist = clean(Owner_tweet_text) EachWordCount = Counter.__call__(clean(Owner_tweet_text)) if len(wordlist) > 3: bigrams = list(ngrams(wordlist, 2)) bigramcounter = dict(Counter.__call__(bigrams)) values = [] V1MI = 0 adder = 0 for s in bigrams: tt = mutualInformation(s[0], s[1], EachWordCount, bigramcounter) adder += tt values.append(tt) if adder != 0: V1MI = float(adder / len(values)) V2sentiObject = TextBlob(Owner_tweet_text).sentiment # p = [] # for x, y in EachWordCount.items(): # pi = EachWordCount[x] / wordLength # p.append(pi * float(math.log(pi, 2))) V13 = lexicon.analyze(Owner_tweet_text, normalize=True) try: deception = ( V13['deception'] + (V13['money'] + V13['hate'] + V13['envy'] + V13['crime'] + V13['magic'] + V13['fear'] + V13['lust'] + V13['power'] / 8)) except: deception = V13['deception'] OTCnorm = [ float(i) / max([ V1MI, 1 - V2sentiObject.subjectivity, 1 - deception ]) for i in [V1MI, 1 - V2sentiObject.subjectivity, 1 - deception] if max([ V1MI, 1 - V2sentiObject.subjectivity, 1 - deception ]) != 0 ] recp = abs((sum(OTCnorm) / 3)) # print(Owner_tweet_text.replace('\n', ''),recp) OriginaltweetMap[ownerName + "," + ownerTweetTimeStamp] = [ ownerName, ownerTweetTimeStamp, ownerFollercount, ownerretweetcount, Owner_tweet_text.replace('\n', ''), OTCnorm, recp ] except ValueError: continue return totalTweets, OriginaltweetMap