Esempio n. 1
0
    def fill_IRA_info(self):
        putin = Are_you_IRA()
        print("补充IRA数据处理中 ...")
        cnt = 0
        IRA_info = pd.read_csv("disk/ira_tweets_csv_hashed.csv",
                               usecols=[
                                   "tweetid", "userid", "tweet_time",
                                   "retweet_userid", "retweet_tweetid"
                               ],
                               dtype=str)
        for _, row in tqdm(IRA_info.iterrows()):
            tweetid = row["tweetid"]
            retweet_id = row["retweet_tweetid"]

            if tweetid in self.tweets:
                uid = row["userid"]
                uid = putin.uncover(uid)

                self.tweets[tweetid]["is_IRA"] = 1

                if self.tweets[tweetid]["user_id"] == -1:
                    self.tweets[tweetid]["user_id"] = uid
                if self.tweets[tweetid]["dt"] == "2000-01-01 00:00:00":
                    self.tweets[tweetid]["dt"] = row["tweet_time"] + ":00"

                cnt += 1

            if retweet_id in self.tweets:
                if self.tweets[retweet_id]["user_id"] == -1:
                    r_uid = row["retweet_userid"]
                    r_uid = putin.uncover(r_uid)
                    self.tweets[retweet_id]["user_id"] = r_uid

        for tweetid in self.tweets.keys():
            if self.tweets[tweetid]["is_IRA"] == -1:
                if putin.f**k(self.tweets[tweetid]["user_id"]):
                    self.tweets[tweetid]["is_IRA"] = 1
                    cnt += 1
                else:
                    self.tweets[tweetid]["is_IRA"] = 0

        print("Count of IRA tweets:", cnt)
def get_ira_network_with_big_networks():
    Putin = Are_you_IRA()
    def search_IRA(in_name, out_name):
        with open(out_name, "w") as f:
            for line in tqdm(open(in_name)):
                w = line.strip().split()
                if Putin.f**k(w[1]) or Putin.f**k(w[2]):
                    f.write(line)
    search_IRA("disk/all-men-links.txt", "disk/ira-men-links.txt")
    search_IRA("disk/all-ret-links.txt", "disk/ira-ret-links.txt")
    search_IRA("disk/all-rep-links.txt", "disk/ira-rep-links.txt")
    search_IRA("disk/all-quo-links.txt", "disk/ira-quo-links.txt")
def get_network_with_ira():
    ira_tweets = pd.read_csv("data/ira-tweets-ele.csv", dtype=str)
    print("loaded ", len(ira_tweets))

    Putin = Are_you_IRA()

    """
    Index(['tweetid', 'userid', 'user_display_name', 'user_screen_name',
       'user_reported_location', 'user_profile_description',
       'user_profile_url', 'follower_count', 'following_count',
       'account_creation_date', 'account_language', 'tweet_language',
       'tweet_text', 'tweet_time', 'tweet_client_name', 'in_reply_to_tweetid',
       'in_reply_to_userid', 'quoted_tweet_tweetid', 'is_retweet',
       'retweet_userid', 'retweet_tweetid', 'latitude', 'longitude',
       'quote_count', 'reply_count', 'like_count', 'retweet_count', 'hashtags',
       'urls', 'user_mentions', 'poll_choices'],
      dtype='object')
    """
    men_file = open("disk/ira-men.txt", "w")
    ret_file = open("disk/ira-ret.txt", "w")
    rep_file = open("disk/ira-rep.txt", "w")
    quo_file = open("disk/ira-quo.txt", "w")

    rep_ira_tweets = ira_tweets[ira_tweets.in_reply_to_tweetid.notnull()]
    quo_ira_tweets = ira_tweets[ira_tweets.quoted_tweet_tweetid.notnull()]
    ret_ira_tweets = ira_tweets[ira_tweets.retweet_tweetid.notnull()]
    men_ira_tweets = ira_tweets[ira_tweets.user_mentions.notnull()]

    rep_file.write("tweet_id,user_id,o_tweet_id,o_user_id\n")
    for i, row in rep_ira_tweets.iterrows():
        rep_file.write(",".join([
            row["tweetid"],
            Putin.uncover(row["userid"]),
            row["in_reply_to_tweetid"],
            Putin.uncover(row["in_reply_to_userid"])
        ]) + "\n")

    cnt = 0
    quo_file.write("tweet_id,user_id,o_tweet_id,o_user_id\n")
    for i, row in quo_ira_tweets.iterrows():
        try:
            quo_file.write(",".join([
                row["tweetid"],
                Putin.uncover(row["userid"]),
                row["quoted_tweet_tweetid"],
                Putin.uncover(row["retweet_userid"])
            ]) + "\n")
        except:
            print(row["retweet_userid"])
            print(row["in_reply_to_userid"])
            cnt += 1
    print(len(quo_ira_tweets), cnt)

    ret_file.write("tweet_id,user_id,o_tweet_id,o_user_id\n")
    for i, row in ret_ira_tweets.iterrows():
        ret_file.write(",".join([
            row["tweetid"],
            Putin.uncover(row["userid"]),
            row["retweet_tweetid"],
            Putin.uncover(row["retweet_userid"])
        ]) + "\n")

    men_file.write("tweet_id,user_id,to_tweet_id,to_user_id\n")
    for i, row in men_ira_tweets.iterrows():
        mentions = row["user_mentions"]
        us = mentions[1:-1].split(", ")
        for u in us:
            men_file.write(",".join([
                row["tweetid"],
                Putin.uncover(row["userid"]),
                Putin.uncover(u)
            ]) + "\n")
Esempio n. 4
0
for uid, v in tqdm(user_support.items()):
    if v[0] > v[1]:
        users_opinion[uid] = "C"
        opinion["C"] += 1
    elif v[0] < v[1]:
        users_opinion[uid] = "T"
        opinion["T"] += 1
    else:
        users_opinion[uid] = "U"
        opinion["U"] += 1


from fake_identify import Are_you_IRA

Putin = Are_you_IRA()

        
def get_tsss(cN, layer="one"):
    """
    获取IRA和non-IRA的活动时间序列
    """
    def get_ts(IRA_nodes):
        dts = []
        for i, row in tqdm(IRA_data.iterrows()):
            u = Putin.uncover(row.userid)
            if u in IRA_nodes:
                _dt = row.tweet_time
                # Move to EST
                _dt = pendulum.parse(_dt).add(hours=-4).to_datetime_string()
                dts.append(_dt)