def draw_5_3_1_figures(): with open('Data/Figure/5_3_1_2.json', 'r') as f: data = json.load(f) e_child = data['e_child'] ne_child = data['ne_child'] e_time = data['e_time'] ne_time = data['ne_time'] ne_time2 = data['ne_time2'] e_c = {} e_t = {} ne_c = {} ne_t = {} ne_t2 = {} for i in range(1, 11): e_c[i] = e_child[str(i)] e_t[i] = e_time[str(i)] ne_c[i] = ne_child[str(i)] ne_t[i] = ne_time[str(i)] ne_t2[i] = ne_time2[str(i)] #if i > 6: # print(e_t[i]) # print(e_c[i]) box = BoxPlot(1) box.set_multiple_data([e_c, ne_c]) box.set_ylog() box.set_label('Depth', 'Child Count') box.save_image('Image/Figure/5_3_1_2.png') print(e_t.keys()) box = BoxPlot(1) box.set_multiple_data([e_t, ne_t]) box.set_ylog() box.set_label('Depth', 'Propagation Time') box.set_yticks(['0', '1 m', '5 m', '1 h', '1 day'], index=[0, 1, 5, 60, 24 * 60]) #box.set_yticks(['0', '1 m', '10 m', '1 h', '1 day'], index=[0,1,10,60, 24*60]) box.save_image('Image/Figure/5_3_1_1.png') #filter the wrong value is duration over 6 month filter_value = 60 * 24 * 180 e_time['1'] = [item for item in e_time['1'] if item < filter_value] ne_time['1'] = [item for item in ne_time['1'] if item < filter_value] print(max(e_time['1'])) print(max(ne_time['1'])) e_time['1'] = sorted(e_time['1']) ne_time['1'] = sorted(ne_time['1']) #print(e_time['1']) #print(ne_time['1']) draw_cdf_plot([e_time['1'], ne_time['1']], 'Propagation Time', ['Echo chamber', 'Non-echo chamber'], '', 'Image/Figure/5_3_2.png')
def bot_participation(): Bot = bot.load_bot() dir_name = "RetweetNew/" files = os.listdir(dir_name) bot_list = [] for postid in files: with open(dir_name + postid, 'r') as f: tweets = json.load(f) users = [tweet['user'] for tweet in tweets.values()] bots = [bot.check_bot(Bot, user) for user in users] bot_list.append(bots.count(1) / bots.count(0)) box = BoxPlot(1) box.set_data(bot_list, '') box.set_xticks('bot_ratio') box.save_image('Image/bot_ratio_box.png')
def draw_propagation_velocity(): echo_v2, _, echo_p2, necho_p2 = rumor_propagation_velocity('Data/echo_chamber2.json') #echo_v3, _ = rumor_propagation_velocity('Data/echo_chamber3.json') #echo_v4, _ = rumor_propagation_velocity('Data/echo_chamber4.json') _, non_echo, _, _ = rumor_propagation_velocity(None) #print(len(echo_v2), len(echo_v3), len(echo_v4), len(non_echo)) box = BoxPlot(1) box.set_data([echo_v2, non_echo],'') box.set_xticks(['Echo Chamber2', 'All']) #box.set_data([echo_v2, echo_v3, echo_v4, non_echo],'') #box.set_xticks(['Echo Chamber2', 'Echo Chamber3', 'Echo Chamber4', 'All']) box.set_label('', 'Mean Propagation Time') box.save_image('Image/%s/propagation_time.png'%folder) box = BoxPlot(1) box.set_multiple_data([echo_p2, necho_p2]) box.set_ylog() box.set_label('Depth', 'Propagation Time') box.save_image('Image/%s/child_all_time_propagation.png'%folder)
def time_to_depth_echo_chamber(filename): _, _, time_depth, _, user_depth = get_depth_time_series(None) print(len(time_depth)) #with open('Data/time_series_data.json', 'w') as f: # json.dump({'time_depth' : time_depth, 'user_depth' : user_depth}, f) #with open('Data/time_series_data.json', 'r') as f: # data = json.load(f) #time_depth = data['time_depth'] #user_depth = data['user_depth'] print("time series data load done ") echo_chamber_values = {} non_echo_chamber_values = {} for item in ['time_depth', 'user_depth']: echo_chamber_values[item] = {} non_echo_chamber_values[item] = {} for i in range(1,20): echo_chamber_values[item][i] = [] non_echo_chamber_values[item][i] = [] Bot = bot.load_bot() echo_chamber_cascade_root = {} cascade_veracity = {} echo_chamber_users = e_util.get_echo_chamber_users(filename) files = os.listdir('RetweetNew') #collect echo chamber user participate cascade #for postid in echo_chamber_users.keys(): for postid in files: v = veracity_type(postid).title() #get origin tweet of echo chamber user with open('RetweetNew/%s'%postid, 'r') as f: tweets = json.load(f) for tweet in tweets.values(): try: #if tweet['user'] in echo_chamber_users[postid].keys(): origin = tweet['origin'] otid = tweet['origin_tweet'] #if origin in echo_chamber_users[postid].keys(): if tweet['user'] in echo_chamber_users[postid].keys(): echo_chamber_cascade_root[tweet['origin_tweet']] = 1 except KeyError : pass cascade_veracity[tweet['origin_tweet']] = v print("echo chamber cascade extraction done") echo_chamber_cascades = echo_chamber_cascade_root.keys() print('echo chamber cascades') #print(echo_chamber_cascades) e = {}; n = {}; r = {}; #echo, non echo, ranked echo for item in ['True', 'False', 'Mixed']: e[item] = {} n[item] = {} r[item] = {} for d_type in ['user_depth', 'time_depth']: e[item][d_type] = {} n[item][d_type] = {} r[item][d_type] = {} for i in range(1, 20): e[item][d_type][i] = [] n[item][d_type][i] = [] r[item][d_type][i] = [] for key in time_depth.keys(): v = cascade_veracity[key] if v !='True' and v != 'False': v = 'Mixed' if key in echo_chamber_cascades: #for i in range(1, max(time_depth[key].keys())+1): for i in range(1, max(time_depth[key].keys())+1): try: echo_chamber_values['time_depth'][i].append(time_depth[key][i]) echo_chamber_values['user_depth'][i].append(user_depth[key][i]) e[v]['time_depth'][i].append(time_depth[key][i]) e[v]['user_depth'][i].append(user_depth[key][i]) except KeyError: pass else: for i in range(1, max(time_depth[key].keys())+1): try : non_echo_chamber_values['time_depth'][i].append(time_depth[key][i]) non_echo_chamber_values['user_depth'][i].append(user_depth[key][i]) n[v]['time_depth'][i].append(time_depth[key][i]) n[v]['user_depth'][i].append(user_depth[key][i]) except KeyError: pass box = BoxPlot(1) box.set_multiple_data([echo_chamber_values['time_depth'], non_echo_chamber_values['time_depth']]) box.set_ylog() box.set_label('Depth', 'Minutes to Depth') box.save_image('%s/time_depth_echo_chamber_box.png'%foldername) print(echo_chamber_values['time_depth']) #draw time to depth, user to depth of cascade for echo chamber users participated or non echo chamer users participated with open('Data/Figure/5_2_1.json', 'w') as f: json.dump([echo_chamber_values['time_depth'], non_echo_chamber_values['time_depth']], f) draw_time_to_depth_echo_chamber([echo_chamber_values['time_depth'], non_echo_chamber_values['time_depth']], ['echo chamber', 'no echo chamber'], 'median minutes', 'time_depth_echo_chamber_line') draw_time_to_depth_echo_chamber([echo_chamber_values['user_depth'], non_echo_chamber_values['user_depth']], ['echo chamber', 'no echo chamber'], 'median unique users', 'user_depth_echo_chamber_line') with open('Data/Figure/5_2_time.json', 'w') as f: json.dump({'e':echo_chamber_values['time_depth'][1], 'ne':non_echo_chamber_values['time_depth'][1]}, f) #draw cdf with top retweet cdf = CDFPlot() cdf.set_label('Propagation Time', 'CDF') cdf.set_log(True) #cdf.set_ylog() cdf.set_data(echo_chamber_values['time_depth'][1], '') cdf.set_data(non_echo_chamber_values['time_depth'][1], '') cdf.save_image('Image/20181105/depth_propagation_time_cdf.png') """
def cascade_depth_distribution(): c_breadth, c_depth, c_unique_users = e_util.get_cascade_max_breadth() depth_cascade = {} depth_unique_users = {} depth_breadth = {} cascade_list = {} for i in range(1,20): depth_cascade[i] = [] depth_unique_users[i] = [] depth_breadth[i] = [] for ccc, postid in enumerate(rumors): #if postid != '126119': # continue #print(postid) with open(dirname + '/' + postid, 'r') as f: tweets = json.load(f) for tweet in tweets.values(): otid = tweet['origin_tweet'] if cascade_list.get(otid, None) == None: max_depth = c_depth[otid] users = c_unique_users[otid] max_breadth = c_breadth[otid] # print(otid, max_depth) cascade_list[otid] = 1 depth_cascade[max_depth].append(tweet['cascade']) depth_unique_users[max_depth].append(users) depth_breadth[max_depth].append(max_breadth) # if ccc > 10: # break print(ccc) #print(depth_cascade) cascade_list = [] user_list = [] breadth_list = [] #box.set_multiple_data([e_child, ne_child]) for i in range(1, 18): cascade_list.append(depth_cascade[i]) user_list.append(depth_unique_users[i]) breadth_list.append(depth_breadth[i]) box = BoxPlot(1) box.set_data(cascade_list, '') box.set_ylog() box.set_label('Depth', 'Cascade Size') box.save_image('Image/%s/depth_cascade_dist.png'%foldername) box = BoxPlot(1) box.set_data(user_list, '') box.set_ylog() box.set_label('Depth', 'Number of Users') box.save_image('Image/%s/depth_user_dist.png'%foldername) box = BoxPlot(1) box.set_data(breadth_list, '') box.set_ylog() box.set_label('Depth', 'Breadth Size') box.save_image('Image/%s/depth_breadth_dist.png'%foldername)
def echo_chamber_diversity(filename): Bot = bot.load_bot() dirname = 'Retweet/' files = os.listdir(dirname) if filename == None: echo_chamber_users = {} for postid in files: echo_chamber_users[postid] = {} else: echo_chamber_users = e_util.get_echo_chamber_users(filename) echo_tweet_diversity = []; echo_source_diversity = []; necho_tweet_diversity = []; necho_source_diversity = []; for postid in files: with open(dirname + postid) as f: tweets = json.load(f) non_echo_users = {} for tweet in tweets.values(): user = tweet['user'] #non echo chamber collect if not user in echo_chamber_users[postid]: non_echo_users[user] = 1 print(len(echo_chamber_users[postid]), len(non_echo_users)) timeline_dir = '../Timeline/' #collect echo chamber users' source diversity err = 0; nerr = 0 for user in echo_chamber_users[postid]: try: with open(timeline_dir + user, 'r') as f: user_tweets = json.load(f) except IOError as e: #print(e) err +=1 continue tweet_diversity, source_diversity = get_diversity(user_tweets) if tweet_diversity != None: echo_tweet_diversity.append(tweet_diversity) if source_diversity != None: echo_source_diversity.append(source_diversity) for user in non_echo_users: try: with open(timeline_dir + user, 'r') as f: user_tweets = json.load(f) except IOError as e: #print(e) nerr += 1 continue tweet_diversity, source_diversity = get_diversity(user_tweets) if tweet_diversity != None: necho_tweet_diversity.append(tweet_diversity) if source_diversity != None: necho_source_diversity.append(source_diversity) #print(err, nerr) #break #CDF cdf = CDFPlot() cdf.set_label('Retweet Origin Diversity', 'CDF') #cdf.set_log(True) cdf.set_data(echo_tweet_diversity, 'Echo Chamber') cdf.set_data(necho_tweet_diversity, 'Non Echo Chamber') cdf.set_legends(['Echo CHamber', 'Non Echo CHamber'], 'User Type') cdf.save_image('Image/20181002/source_diversity_retweet_cdf.png') cdf = CDFPlot() cdf.set_label('Source News Diversity', 'CDF') #cdf.set_log(True) cdf.set_data(echo_source_diversity, 'Echo Chamber') cdf.set_data(necho_source_diversity, 'Non Echo Chamber') cdf.set_legends(['Echo CHamber', 'Non Echo CHamber'], 'User Type') cdf.save_image('Image/20181002/source_diversity_news_cdf.png') #BoxPlot box = BoxPlot(1) box.set_data([echo_tweet_diversity, necho_tweet_diversity],'') box.set_xticks(['Echo Chamber', 'Non Echo Chamber', 'All']) box.set_label('', 'Retweet Origin Diversity') box.save_image('Image/20181002/source_diversity_retweet.png') box = BoxPlot(1) box.set_data([echo_source_diversity, necho_source_diversity],'') box.set_xticks(['Echo Chamber', 'Non Echo Chamber', 'All']) box.set_label('', 'Source News Diversity') box.save_image('Image/20181002/source_diversity_news.png')
all_sharecount_list = [] for item in category_list: #sharecount list of one category all_sharecount_list.append(sharecount_by_category(item)) subplot_num = lambda x: int(math.sqrt(x)) if math.sqrt(x).is_integer( ) else int(math.sqrt(x)) + 1 #Display BoxPlot and save at the path BoxPlt = BoxPlot(subplot_num(25)) for i in range(25): BoxPlt.set_data(all_sharecount_list[i], '') BoxPlt.set_title(category_list[i]) BoxPlt.set_ylim(1000) BoxPlt.save_image('./image/sharecount_box_plot.png') all_sharecount_list = [] #category with source presence for item in category_list: all_sharecount_list.append([ sharecount_by_category_with_source(item), sharecount_by_category_without_source(item) ]) #Display BoxPlot and save at the path BoxPlt = BoxPlot(subplot_num(25)) for i in range(25): BoxPlt.set_data(all_sharecount_list[i], '') BoxPlt.set_title(category_list[i]) # BoxPlt.set_ylim(2000)
def draw_propagation_time_to_group(): print('echo chamber 2') echo_v2, necho_v2, recho_v2, rnecho_v2 = propagation_time_to_group( 'Data/echo_chamber2.json') box = BoxPlot(1) box.set_data([echo_v2, necho_v2], '') box.set_xticks(['Echo Chamber', 'Non Echo Chamber']) box.set_label('', 'Propagation Time') box.save_image('Image/%s/propagation_time_to_group2.png' % folder) box = BoxPlot(1) box.set_data([recho_v2, rnecho_v2], '') box.set_xticks(['Echo Chamber', 'Non Echo Chamber']) box.set_label('', 'Propagation Time') box.save_image('Image/%s/propagation_time_to_group_r2.png' % folder) print('echo chamber 3') echo_v3, necho_v3, recho_v3, rnecho_v3 = propagation_time_to_group( 'Data/echo_chamber3.json') box = BoxPlot(1) box.set_data([echo_v3, necho_v3], '') box.set_xticks(['Echo Chamber', 'Non Echo Chamber']) box.set_label('', 'Propagation Time') box.save_image('Image/%s/propagation_time_to_group3.png' % folder) box = BoxPlot(1) box.set_data([recho_v3, rnecho_v3], '') box.set_xticks(['Echo Chamber', 'Non Echo Chamber']) box.set_label('', 'Propagation Time') box.save_image('Image/%s/propagation_time_to_group_r3.png' % folder) print('echo chamber 4') echo_v4, necho_v4, recho_v4, rnecho_v4 = propagation_time_to_group( 'Data/echo_chamber4.json') box = BoxPlot(1) box.set_data([echo_v4, necho_v4], '') box.set_xticks(['Echo Chamber', 'Non Echo Chamber']) box.set_label('', 'Propagation Time') box.save_image('Image/%s/propagation_time_to_group4.png' % folder) box = BoxPlot(1) box.set_data([recho_v4, rnecho_v4], '') box.set_xticks(['Echo Chamber', 'Non Echo Chamber']) box.set_label('', 'Propagation Time') box.save_image('Image/%s/propagation_time_to_group_r4.png' % folder) _, non_echo, _, rnon_echo = propagation_time_to_group(None) print(len(echo_v2), len(echo_v3), len(echo_v4), len(non_echo)) box = BoxPlot(1) box.set_data([echo_v2, echo_v3, echo_v4, non_echo], '') box.set_xticks(['Echo Chamber2', 'Echo Chamber3', 'Echo Chamber4', 'All']) box.set_label('', 'Propagation Time') box.save_image('Image/%s/propagation_time_to_group.png' % folder) box = BoxPlot(1) box.set_data([recho_v2, recho_v3, recho_v4, rnon_echo], '') box.set_xticks(['Echo Chamber2', 'Echo Chamber3', 'Echo Chamber4', 'All']) box.set_label('', 'Propagation Time') box.save_image('Image/%s/propagation_time_to_group_r.png' % folder)
def propagation_parent_to_child(): Bot = bot.load_bot() dirname = 'RetweetNew/' files = os.listdir(dirname) filename = 'Data/echo_chamber2.json' if filename == None: echo_chamber_users = {} for postid in files: echo_chamber_users[postid] = {} else: echo_chamber_users = e_util.get_echo_chamber_users(filename) echo_chamber_cascades = {} tweet_cache = {} ''' for postid in echo_chamber_users.keys(): users = echo_chamber_users[postid] #echo chamber users with open('RetweetNew/' + postid, 'r') as f: tweets = json.load(f) tweet_cache[postid] = tweets for tweet in tweets.values(): if tweet['user'] in users: root_id = tweet['origin_tweet'] #root tweet id echo_chamber_cascades[root_id] = 1 echo_chamber_cascades_ids = echo_chamber_cascades.keys() ''' #print(echo_chamber_cascades_ids) e_child = {} ne_child = {} e_time = {} ne_time = {} ne_time2 = {} for i in range(1, 20): e_child[i] = [] ne_child[i] = [] e_time[i] = {} ne_time[i] = {} ne_time2[i] = {} print(len(echo_chamber_users.keys())) for ccc, postid in enumerate(files): #if postid != '150232' and postid != '29947': # continue with open(dirname + postid, 'r') as f: tweets = json.load(f) #tweets = tweet_cache[postid] #if not util.is_politics(postid): #if not util.is_non_politics(postid): #if not util.is_veracity(postid, 'False'): #if not util.is_veracity(postid, 'Mixture,Mostly False,Mostly True'): # continue #order by timeline sort = {} for key in tweets.keys(): tweet = tweets[key] sort[key] = parser.parse(tweet['time']) #sort by time new_list = sorted(sort.items(), key=lambda x: x[1]) sorted_ids = [item[0] for item in new_list] e_users = echo_chamber_users[postid] #e_users = echo_chamber_users.get(postid, []) print(len(e_users)) for i, tid in enumerate(sorted_ids): tweet = tweets[tid]['tweet'] parent = tweets[tid]['parent'] origin = tweets[tid]['origin'] root = tweets[tid]['origin_tweet'] cascade = tweets[tid]['cascade'] userid = tweets[tid]['user'] ptid = tweets[tid]['parent_tweet'] if cascade < 2: continue #bot filter if bot.check_bot(Bot, userid) != 0: continue if userid in e_users: e_child[tweets[tid]['depth']].append(tweets[tid]['child']) else: ne_child[tweets[tid]['depth']].append(tweets[tid]['child']) if tweets[tid]['depth'] > 1: diff = (parser.parse(tweets[tid]['time']) - parser.parse( tweets[ptid]['time'])).total_seconds() / 60 if e_time[tweets[ptid]['depth']].get(ptid, -1) > diff: print(e_time[tweets[ptid]['depth']][ptid], diff) if parent in e_users: # if origin in e_users: if e_time[tweets[ptid]['depth']].get(ptid, -1) == -1: e_time[tweets[ptid]['depth']][ptid] = diff else: if ne_time[tweets[ptid]['depth']].get(ptid, -1) == -1: ne_time[tweets[ptid]['depth']][ptid] = diff #if ccc == 5: # break #remove child 0 count for i in range(1, 20): e_child[i] = [x for x in e_child[i] if x != 0] ne_child[i] = [x for x in ne_child[i] if x != 0] box = BoxPlot(1) box.set_multiple_data([e_child, ne_child]) box.set_ylog() box.set_label('Depth', 'Child Count') box.save_image('Image/%s/child_num_wo_propagation.png' % folder) for i in range(1, 20): e_time[i] = e_time[i].values() ne_time[i] = ne_time[i].values() ne_time2[i] = ne_time2[i].values() #print(e_time) #print(ne_time) box = BoxPlot(1) box.set_multiple_data([e_time, ne_time]) box.set_ylog() box.set_label('Depth', 'Propagation Time') box.save_image('Image/%s/child_time_propagation.png' % folder) with open('Data/Figure/5_3_1.json', 'w') as f: json.dump( { 'e_time': e_time, 'ne_time': ne_time, 'e_child': e_child, 'ne_child': ne_child }, f)
def diversity(filename): index = filename.replace(".json", "").split('echo_chamber') print(index) with open(filename) as f: echo_chambers = json.load(f) print('total ', len(echo_chambers)) friends_cache = {} postid = {} count = 0 echo_diversity = {} for key in echo_chambers: #print(key) users = echo_chambers[key] #print(users) count += 1 if count % 100 == 0: print(count) #break if len(users) < 2: continue postids = key.split('_') for k in postids: postid[k] = 1 #print(len(users)) polars = [] user_count = 0 #polarity scores for userid in users: score = get_polarity(userid) if score != -999: polars.append(score) user_count += 1 postid[postids[0]] = user_count postid[postids[1]] = user_count if 1 in postid.values(): break diversity = util.eta(polars) echo_diversity[key] = diversity random_diversity = {} for key in postid.keys(): #number of users user_num = postid[key] #print(user_num) with open(dir_name + key, 'r') as f: tweets = json.load(f) users = [tweet['user']for tweet in tweets.values()] users = get_random_user(users, user_num) polars = [] #polarity scores for userid in users: score = get_polarity(userid) if score != -999: polars.append(score) diversity = util.eta(polars) random_diversity[key] = diversity #print(users) #print(polars) #print(diversity) with open('Data/echo_chamber_diversity.json', 'w') as f: json.dump({'echo_chamber':echo_diversity, 'random':random_diversity}, f) box = BoxPlot(1) box.set_data([random_diversity.values(), echo_diversity.values()],'') box.set_xticks(['Random', 'Echo chamber']) box.save_image('Image/diversity_box_%s.png'%index[1])
def polarity_diversity(): #check rumor polarity similarity #check cascade polarity similarity files = os.listdir(dir_name) users_polarity = {} users_polarity_cascade = {} retweet_cache = {} for ccc, postid in enumerate(files): users_polarity[postid] = {} with open(dir_name+ '%s'%postid, 'r') as f: tweets = json.load(f) retweet_cache[postid] = tweets for tweet in tweets.values(): p_score = get_polarity(tweet['user']) users_polarity[postid][tweet['user']] = p_score users_polarity_cascade[tweet['origin_tweet']] = users_polarity_cascade.get(tweet['origin_tweet'], {}) users_polarity_cascade[tweet['origin_tweet']][tweet['user']] = p_score #if ccc == 10: # break r_diversity = [] for key in users_polarity.keys(): r_diversity.append(util.eta([score for score in users_polarity[key].values()])) print(pd.Series(r_diversity).describe()) c_diversity = [] for key in users_polarity_cascade.keys(): if len(users_polarity_cascade[key]) < 2: continue c_diversity.append(util.eta([score for score in users_polarity_cascade[key].values()])) print(pd.Series(c_diversity).describe()) box = BoxPlot(1) box.set_data([r_diversity, c_diversity],'') box.set_xticks(['Rumor', 'Cascade']) box.save_image('Image/%s/diversity_box.png'%foldername) #check echo chamber users' poarltiy similarity e_diversity = [] echo_chamber_users = {} with open('Data/echo_chamber2.json') as f: echo_chamber = json.load(f) for key in echo_chamber: users = echo_chamber[key] if len(users) < 2: continue polar = [] e_diversity.append(util.eta([get_polarity(user) for user in users])) #get all echo chamber users for cascade characteristics for postid in key.split('_'): echo_chamber_users[postid] = echo_chamber_users.get(postid, {}) for user in users: echo_chamber_users[postid][user] = 1 print(pd.Series(e_diversity).describe()) #check echo chamber user pariticpate polarity similarity and non-echo chamber user participate polarity similarity echo_cascade = {} cascade_users = {} for postid in files: tweets = retweet_cache[postid] #get echo chamber cascade for tweet in tweets.values(): if tweet['user'] in echo_chamber_users[postid].keys(): echo_cascade[tweet['origin_tweet']] = 1 cascade_users[tweet['origin_tweet']] = cascade_users.get(tweet['origin_tweet'], {}) cascade_users[tweet['origin_tweet']][tweet['user']] = get_polarity(tweet['user']) echo_cascade = echo_cascade.keys() echo_cascade_diversity = [] echo_cascade_size = [] non_echo_cascade_diversity = [] non_echo_cascade_size = [] for cascade in cascade_users.keys(): #echo chamber user participated cascade if cascade in echo_cascade: echo_cascade_diversity.append(utily.eta([score for score in cascade_users[cascade].values()])) echo_cascade_size.append(len(cascade_users[cascade])) #non echo chamber user participated cascade else: non_echo_cascade_diversity.append(util.eta([score for score in cascade_users[cascade].values()])) non_echo_cascade_size.append(len(cascade_users[cascade])) print('echo chamber cascade') print(pd.Series(echo_cascade_diversity).describe()) print(pd.Series(echo_cascade_size).describe()) print('non echo chamber cascade') print(pd.Series(non_echo_cascade_diversity).describe()) print(pd.Series(non_echo_cascade_size).describe()) box = BoxPlot(1) box.set_data([echo_cascade_diversity, non_echo_cascade_diversity],'') box.set_xticks(['Echo Chamber', 'Non Echo Chamber']) box.save_image('Image/20180927/diversity_echo_cascade_box.png') box = BoxPlot(1) box.set_data([echo_cascade_size, non_echo_cascade_size],'') box.set_xticks(['Echo Chamber', 'Non Echo Chamber']) box.save_image('Image/20180927/diversity_echo_cascade_size_box.png')