def draw_5_3_1_figures(): with open('Data/Figure/5_3_1_2.json', 'r') as f: data = json.load(f) e_child = data['e_child'] ne_child = data['ne_child'] e_time = data['e_time'] ne_time = data['ne_time'] ne_time2 = data['ne_time2'] e_c = {} e_t = {} ne_c = {} ne_t = {} ne_t2 = {} for i in range(1, 11): e_c[i] = e_child[str(i)] e_t[i] = e_time[str(i)] ne_c[i] = ne_child[str(i)] ne_t[i] = ne_time[str(i)] ne_t2[i] = ne_time2[str(i)] #if i > 6: # print(e_t[i]) # print(e_c[i]) box = BoxPlot(1) box.set_multiple_data([e_c, ne_c]) box.set_ylog() box.set_label('Depth', 'Child Count') box.save_image('Image/Figure/5_3_1_2.png') print(e_t.keys()) box = BoxPlot(1) box.set_multiple_data([e_t, ne_t]) box.set_ylog() box.set_label('Depth', 'Propagation Time') box.set_yticks(['0', '1 m', '5 m', '1 h', '1 day'], index=[0, 1, 5, 60, 24 * 60]) #box.set_yticks(['0', '1 m', '10 m', '1 h', '1 day'], index=[0,1,10,60, 24*60]) box.save_image('Image/Figure/5_3_1_1.png') #filter the wrong value is duration over 6 month filter_value = 60 * 24 * 180 e_time['1'] = [item for item in e_time['1'] if item < filter_value] ne_time['1'] = [item for item in ne_time['1'] if item < filter_value] print(max(e_time['1'])) print(max(ne_time['1'])) e_time['1'] = sorted(e_time['1']) ne_time['1'] = sorted(ne_time['1']) #print(e_time['1']) #print(ne_time['1']) draw_cdf_plot([e_time['1'], ne_time['1']], 'Propagation Time', ['Echo chamber', 'Non-echo chamber'], '', 'Image/Figure/5_3_2.png')
def draw_propagation_velocity(): echo_v2, _, echo_p2, necho_p2 = rumor_propagation_velocity('Data/echo_chamber2.json') #echo_v3, _ = rumor_propagation_velocity('Data/echo_chamber3.json') #echo_v4, _ = rumor_propagation_velocity('Data/echo_chamber4.json') _, non_echo, _, _ = rumor_propagation_velocity(None) #print(len(echo_v2), len(echo_v3), len(echo_v4), len(non_echo)) box = BoxPlot(1) box.set_data([echo_v2, non_echo],'') box.set_xticks(['Echo Chamber2', 'All']) #box.set_data([echo_v2, echo_v3, echo_v4, non_echo],'') #box.set_xticks(['Echo Chamber2', 'Echo Chamber3', 'Echo Chamber4', 'All']) box.set_label('', 'Mean Propagation Time') box.save_image('Image/%s/propagation_time.png'%folder) box = BoxPlot(1) box.set_multiple_data([echo_p2, necho_p2]) box.set_ylog() box.set_label('Depth', 'Propagation Time') box.save_image('Image/%s/child_all_time_propagation.png'%folder)
def time_to_depth_echo_chamber(filename): _, _, time_depth, _, user_depth = get_depth_time_series(None) print(len(time_depth)) #with open('Data/time_series_data.json', 'w') as f: # json.dump({'time_depth' : time_depth, 'user_depth' : user_depth}, f) #with open('Data/time_series_data.json', 'r') as f: # data = json.load(f) #time_depth = data['time_depth'] #user_depth = data['user_depth'] print("time series data load done ") echo_chamber_values = {} non_echo_chamber_values = {} for item in ['time_depth', 'user_depth']: echo_chamber_values[item] = {} non_echo_chamber_values[item] = {} for i in range(1,20): echo_chamber_values[item][i] = [] non_echo_chamber_values[item][i] = [] Bot = bot.load_bot() echo_chamber_cascade_root = {} cascade_veracity = {} echo_chamber_users = e_util.get_echo_chamber_users(filename) files = os.listdir('RetweetNew') #collect echo chamber user participate cascade #for postid in echo_chamber_users.keys(): for postid in files: v = veracity_type(postid).title() #get origin tweet of echo chamber user with open('RetweetNew/%s'%postid, 'r') as f: tweets = json.load(f) for tweet in tweets.values(): try: #if tweet['user'] in echo_chamber_users[postid].keys(): origin = tweet['origin'] otid = tweet['origin_tweet'] #if origin in echo_chamber_users[postid].keys(): if tweet['user'] in echo_chamber_users[postid].keys(): echo_chamber_cascade_root[tweet['origin_tweet']] = 1 except KeyError : pass cascade_veracity[tweet['origin_tweet']] = v print("echo chamber cascade extraction done") echo_chamber_cascades = echo_chamber_cascade_root.keys() print('echo chamber cascades') #print(echo_chamber_cascades) e = {}; n = {}; r = {}; #echo, non echo, ranked echo for item in ['True', 'False', 'Mixed']: e[item] = {} n[item] = {} r[item] = {} for d_type in ['user_depth', 'time_depth']: e[item][d_type] = {} n[item][d_type] = {} r[item][d_type] = {} for i in range(1, 20): e[item][d_type][i] = [] n[item][d_type][i] = [] r[item][d_type][i] = [] for key in time_depth.keys(): v = cascade_veracity[key] if v !='True' and v != 'False': v = 'Mixed' if key in echo_chamber_cascades: #for i in range(1, max(time_depth[key].keys())+1): for i in range(1, max(time_depth[key].keys())+1): try: echo_chamber_values['time_depth'][i].append(time_depth[key][i]) echo_chamber_values['user_depth'][i].append(user_depth[key][i]) e[v]['time_depth'][i].append(time_depth[key][i]) e[v]['user_depth'][i].append(user_depth[key][i]) except KeyError: pass else: for i in range(1, max(time_depth[key].keys())+1): try : non_echo_chamber_values['time_depth'][i].append(time_depth[key][i]) non_echo_chamber_values['user_depth'][i].append(user_depth[key][i]) n[v]['time_depth'][i].append(time_depth[key][i]) n[v]['user_depth'][i].append(user_depth[key][i]) except KeyError: pass box = BoxPlot(1) box.set_multiple_data([echo_chamber_values['time_depth'], non_echo_chamber_values['time_depth']]) box.set_ylog() box.set_label('Depth', 'Minutes to Depth') box.save_image('%s/time_depth_echo_chamber_box.png'%foldername) print(echo_chamber_values['time_depth']) #draw time to depth, user to depth of cascade for echo chamber users participated or non echo chamer users participated with open('Data/Figure/5_2_1.json', 'w') as f: json.dump([echo_chamber_values['time_depth'], non_echo_chamber_values['time_depth']], f) draw_time_to_depth_echo_chamber([echo_chamber_values['time_depth'], non_echo_chamber_values['time_depth']], ['echo chamber', 'no echo chamber'], 'median minutes', 'time_depth_echo_chamber_line') draw_time_to_depth_echo_chamber([echo_chamber_values['user_depth'], non_echo_chamber_values['user_depth']], ['echo chamber', 'no echo chamber'], 'median unique users', 'user_depth_echo_chamber_line') with open('Data/Figure/5_2_time.json', 'w') as f: json.dump({'e':echo_chamber_values['time_depth'][1], 'ne':non_echo_chamber_values['time_depth'][1]}, f) #draw cdf with top retweet cdf = CDFPlot() cdf.set_label('Propagation Time', 'CDF') cdf.set_log(True) #cdf.set_ylog() cdf.set_data(echo_chamber_values['time_depth'][1], '') cdf.set_data(non_echo_chamber_values['time_depth'][1], '') cdf.save_image('Image/20181105/depth_propagation_time_cdf.png') """
def cascade_depth_distribution(): c_breadth, c_depth, c_unique_users = e_util.get_cascade_max_breadth() depth_cascade = {} depth_unique_users = {} depth_breadth = {} cascade_list = {} for i in range(1,20): depth_cascade[i] = [] depth_unique_users[i] = [] depth_breadth[i] = [] for ccc, postid in enumerate(rumors): #if postid != '126119': # continue #print(postid) with open(dirname + '/' + postid, 'r') as f: tweets = json.load(f) for tweet in tweets.values(): otid = tweet['origin_tweet'] if cascade_list.get(otid, None) == None: max_depth = c_depth[otid] users = c_unique_users[otid] max_breadth = c_breadth[otid] # print(otid, max_depth) cascade_list[otid] = 1 depth_cascade[max_depth].append(tweet['cascade']) depth_unique_users[max_depth].append(users) depth_breadth[max_depth].append(max_breadth) # if ccc > 10: # break print(ccc) #print(depth_cascade) cascade_list = [] user_list = [] breadth_list = [] #box.set_multiple_data([e_child, ne_child]) for i in range(1, 18): cascade_list.append(depth_cascade[i]) user_list.append(depth_unique_users[i]) breadth_list.append(depth_breadth[i]) box = BoxPlot(1) box.set_data(cascade_list, '') box.set_ylog() box.set_label('Depth', 'Cascade Size') box.save_image('Image/%s/depth_cascade_dist.png'%foldername) box = BoxPlot(1) box.set_data(user_list, '') box.set_ylog() box.set_label('Depth', 'Number of Users') box.save_image('Image/%s/depth_user_dist.png'%foldername) box = BoxPlot(1) box.set_data(breadth_list, '') box.set_ylog() box.set_label('Depth', 'Breadth Size') box.save_image('Image/%s/depth_breadth_dist.png'%foldername)
def propagation_parent_to_child(): Bot = bot.load_bot() dirname = 'RetweetNew/' files = os.listdir(dirname) filename = 'Data/echo_chamber2.json' if filename == None: echo_chamber_users = {} for postid in files: echo_chamber_users[postid] = {} else: echo_chamber_users = e_util.get_echo_chamber_users(filename) echo_chamber_cascades = {} tweet_cache = {} ''' for postid in echo_chamber_users.keys(): users = echo_chamber_users[postid] #echo chamber users with open('RetweetNew/' + postid, 'r') as f: tweets = json.load(f) tweet_cache[postid] = tweets for tweet in tweets.values(): if tweet['user'] in users: root_id = tweet['origin_tweet'] #root tweet id echo_chamber_cascades[root_id] = 1 echo_chamber_cascades_ids = echo_chamber_cascades.keys() ''' #print(echo_chamber_cascades_ids) e_child = {} ne_child = {} e_time = {} ne_time = {} ne_time2 = {} for i in range(1, 20): e_child[i] = [] ne_child[i] = [] e_time[i] = {} ne_time[i] = {} ne_time2[i] = {} print(len(echo_chamber_users.keys())) for ccc, postid in enumerate(files): #if postid != '150232' and postid != '29947': # continue with open(dirname + postid, 'r') as f: tweets = json.load(f) #tweets = tweet_cache[postid] #if not util.is_politics(postid): #if not util.is_non_politics(postid): #if not util.is_veracity(postid, 'False'): #if not util.is_veracity(postid, 'Mixture,Mostly False,Mostly True'): # continue #order by timeline sort = {} for key in tweets.keys(): tweet = tweets[key] sort[key] = parser.parse(tweet['time']) #sort by time new_list = sorted(sort.items(), key=lambda x: x[1]) sorted_ids = [item[0] for item in new_list] e_users = echo_chamber_users[postid] #e_users = echo_chamber_users.get(postid, []) print(len(e_users)) for i, tid in enumerate(sorted_ids): tweet = tweets[tid]['tweet'] parent = tweets[tid]['parent'] origin = tweets[tid]['origin'] root = tweets[tid]['origin_tweet'] cascade = tweets[tid]['cascade'] userid = tweets[tid]['user'] ptid = tweets[tid]['parent_tweet'] if cascade < 2: continue #bot filter if bot.check_bot(Bot, userid) != 0: continue if userid in e_users: e_child[tweets[tid]['depth']].append(tweets[tid]['child']) else: ne_child[tweets[tid]['depth']].append(tweets[tid]['child']) if tweets[tid]['depth'] > 1: diff = (parser.parse(tweets[tid]['time']) - parser.parse( tweets[ptid]['time'])).total_seconds() / 60 if e_time[tweets[ptid]['depth']].get(ptid, -1) > diff: print(e_time[tweets[ptid]['depth']][ptid], diff) if parent in e_users: # if origin in e_users: if e_time[tweets[ptid]['depth']].get(ptid, -1) == -1: e_time[tweets[ptid]['depth']][ptid] = diff else: if ne_time[tweets[ptid]['depth']].get(ptid, -1) == -1: ne_time[tweets[ptid]['depth']][ptid] = diff #if ccc == 5: # break #remove child 0 count for i in range(1, 20): e_child[i] = [x for x in e_child[i] if x != 0] ne_child[i] = [x for x in ne_child[i] if x != 0] box = BoxPlot(1) box.set_multiple_data([e_child, ne_child]) box.set_ylog() box.set_label('Depth', 'Child Count') box.save_image('Image/%s/child_num_wo_propagation.png' % folder) for i in range(1, 20): e_time[i] = e_time[i].values() ne_time[i] = ne_time[i].values() ne_time2[i] = ne_time2[i].values() #print(e_time) #print(ne_time) box = BoxPlot(1) box.set_multiple_data([e_time, ne_time]) box.set_ylog() box.set_label('Depth', 'Propagation Time') box.save_image('Image/%s/child_time_propagation.png' % folder) with open('Data/Figure/5_3_1.json', 'w') as f: json.dump( { 'e_time': e_time, 'ne_time': ne_time, 'e_child': e_child, 'ne_child': ne_child }, f)