def fill_IRA_info(self): putin = Are_you_IRA() print("补充IRA数据处理中 ...") cnt = 0 IRA_info = pd.read_csv("disk/ira_tweets_csv_hashed.csv", usecols=[ "tweetid", "userid", "tweet_time", "retweet_userid", "retweet_tweetid" ], dtype=str) for _, row in tqdm(IRA_info.iterrows()): tweetid = row["tweetid"] retweet_id = row["retweet_tweetid"] if tweetid in self.tweets: uid = row["userid"] uid = putin.uncover(uid) self.tweets[tweetid]["is_IRA"] = 1 if self.tweets[tweetid]["user_id"] == -1: self.tweets[tweetid]["user_id"] = uid if self.tweets[tweetid]["dt"] == "2000-01-01 00:00:00": self.tweets[tweetid]["dt"] = row["tweet_time"] + ":00" cnt += 1 if retweet_id in self.tweets: if self.tweets[retweet_id]["user_id"] == -1: r_uid = row["retweet_userid"] r_uid = putin.uncover(r_uid) self.tweets[retweet_id]["user_id"] = r_uid for tweetid in self.tweets.keys(): if self.tweets[tweetid]["is_IRA"] == -1: if putin.f**k(self.tweets[tweetid]["user_id"]): self.tweets[tweetid]["is_IRA"] = 1 cnt += 1 else: self.tweets[tweetid]["is_IRA"] = 0 print("Count of IRA tweets:", cnt)
def get_ira_network_with_big_networks(): Putin = Are_you_IRA() def search_IRA(in_name, out_name): with open(out_name, "w") as f: for line in tqdm(open(in_name)): w = line.strip().split() if Putin.f**k(w[1]) or Putin.f**k(w[2]): f.write(line) search_IRA("disk/all-men-links.txt", "disk/ira-men-links.txt") search_IRA("disk/all-ret-links.txt", "disk/ira-ret-links.txt") search_IRA("disk/all-rep-links.txt", "disk/ira-rep-links.txt") search_IRA("disk/all-quo-links.txt", "disk/ira-quo-links.txt")
def get_network_with_ira(): ira_tweets = pd.read_csv("data/ira-tweets-ele.csv", dtype=str) print("loaded ", len(ira_tweets)) Putin = Are_you_IRA() """ Index(['tweetid', 'userid', 'user_display_name', 'user_screen_name', 'user_reported_location', 'user_profile_description', 'user_profile_url', 'follower_count', 'following_count', 'account_creation_date', 'account_language', 'tweet_language', 'tweet_text', 'tweet_time', 'tweet_client_name', 'in_reply_to_tweetid', 'in_reply_to_userid', 'quoted_tweet_tweetid', 'is_retweet', 'retweet_userid', 'retweet_tweetid', 'latitude', 'longitude', 'quote_count', 'reply_count', 'like_count', 'retweet_count', 'hashtags', 'urls', 'user_mentions', 'poll_choices'], dtype='object') """ men_file = open("disk/ira-men.txt", "w") ret_file = open("disk/ira-ret.txt", "w") rep_file = open("disk/ira-rep.txt", "w") quo_file = open("disk/ira-quo.txt", "w") rep_ira_tweets = ira_tweets[ira_tweets.in_reply_to_tweetid.notnull()] quo_ira_tweets = ira_tweets[ira_tweets.quoted_tweet_tweetid.notnull()] ret_ira_tweets = ira_tweets[ira_tweets.retweet_tweetid.notnull()] men_ira_tweets = ira_tweets[ira_tweets.user_mentions.notnull()] rep_file.write("tweet_id,user_id,o_tweet_id,o_user_id\n") for i, row in rep_ira_tweets.iterrows(): rep_file.write(",".join([ row["tweetid"], Putin.uncover(row["userid"]), row["in_reply_to_tweetid"], Putin.uncover(row["in_reply_to_userid"]) ]) + "\n") cnt = 0 quo_file.write("tweet_id,user_id,o_tweet_id,o_user_id\n") for i, row in quo_ira_tweets.iterrows(): try: quo_file.write(",".join([ row["tweetid"], Putin.uncover(row["userid"]), row["quoted_tweet_tweetid"], Putin.uncover(row["retweet_userid"]) ]) + "\n") except: print(row["retweet_userid"]) print(row["in_reply_to_userid"]) cnt += 1 print(len(quo_ira_tweets), cnt) ret_file.write("tweet_id,user_id,o_tweet_id,o_user_id\n") for i, row in ret_ira_tweets.iterrows(): ret_file.write(",".join([ row["tweetid"], Putin.uncover(row["userid"]), row["retweet_tweetid"], Putin.uncover(row["retweet_userid"]) ]) + "\n") men_file.write("tweet_id,user_id,to_tweet_id,to_user_id\n") for i, row in men_ira_tweets.iterrows(): mentions = row["user_mentions"] us = mentions[1:-1].split(", ") for u in us: men_file.write(",".join([ row["tweetid"], Putin.uncover(row["userid"]), Putin.uncover(u) ]) + "\n")
for uid, v in tqdm(user_support.items()): if v[0] > v[1]: users_opinion[uid] = "C" opinion["C"] += 1 elif v[0] < v[1]: users_opinion[uid] = "T" opinion["T"] += 1 else: users_opinion[uid] = "U" opinion["U"] += 1 from fake_identify import Are_you_IRA Putin = Are_you_IRA() def get_tsss(cN, layer="one"): """ 获取IRA和non-IRA的活动时间序列 """ def get_ts(IRA_nodes): dts = [] for i, row in tqdm(IRA_data.iterrows()): u = Putin.uncover(row.userid) if u in IRA_nodes: _dt = row.tweet_time # Move to EST _dt = pendulum.parse(_dt).add(hours=-4).to_datetime_string() dts.append(_dt)