Esempio n. 1
0
def breadth_change():

    with open('Data/echo_chamber2.json', 'r') as f:
        echo_chambers = json.load(f)

    count = 0
    for keys in echo_chambers.keys():
        users = echo_chambers[keys]
        breadth_series, user_index = get_breadth_time_series(keys, users)

        for key in breadth_series.keys():
            breadth = breadth_series[key]
            user = user_index[key]
            line = LinePlot()
            #line.set_ylog()
            line.set_label('Users', 'Breadth')
            line.set_plot_data(breadth, np.arange(1, len(breadth) + 1, 1))
            line.set_axvline(user)
            #line.set_legends(['True', 'False', 'Mixed'])
            #line.set_xticks(x_ticks)
            line.save_image('Image/Breadth/breadth_change_line_%s_%s.png' %
                            (keys, key))

        count += 1
        if count > 0:
            break
Esempio n. 2
0
def time_to_depth():
    #index = filename.replace(".json", "").split('echo_chamber')
    #print(index)
    _, _, time_depth_cascade, user_ids, cascade_depth_users = get_depth_time_series('True')    
    _, _, time_depth_cascade2, user_ids2, cascade_depth_users2 = get_depth_time_series('False')    
    _, _, time_depth_cascade3, user_ids3, cascade_depth_users3 = get_depth_time_series('Mixture,Mostly True,Mostly False')    


    t = {}; e = {}; u_all = {}
    #true, false, mixture time to depth
    t_td = {}; f_td = {}; m_td = {};
    #true, false, mixture user to depth
    t_ud = {}; f_ud = {}; m_ud = {};

    for i in range(1,20):
        t[i] = [] 
        e[i] = []
        u_all[i] = []
        t_td[i] = []
        f_td[i] = []
        m_td[i] = []
        t_ud[i] = []
        f_ud[i] = []
        m_ud[i] = []

    for key in time_depth_cascade.keys():
        for i in range(1, max(time_depth_cascade[key].keys())):
            t[i].append(time_depth_cascade[key][i]) # 1 ~ max_depth 
            t_td[i].append(time_depth_cascade[key][i]) # 1 ~ max_depth 
            try:
                u_all[i].append(cascade_depth_users[key][i])
                t_ud[i].append(cascade_depth_users[key][i])
            except KeyError :
                pass 
            
    for key in time_depth_cascade2.keys():
        for i in range(1, max(time_depth_cascade2[key].keys())):
            f_td[i].append(time_depth_cascade2[key][i]) # 1 ~ max_depth 
            try:
                f_ud[i].append(cascade_depth_users2[key][i])
            except KeyError :
                pass 
    for key in time_depth_cascade3.keys():
        for i in range(1, max(time_depth_cascade3[key].keys())):
            m_td[i].append(time_depth_cascade3[key][i]) # 1 ~ max_depth 
            try:
                m_ud[i].append(cascade_depth_users3[key][i])
            except KeyError :
                pass 

    x_ticks = np.arange(1,20)
    """
    depth_list = []
    veracity_list = []
    time_list = []
    for depth in x_ticks:
        for value in t_td[depth]:
            depth_list.append(depth)
            time_list.append(value)
            veracity_list.append('True')
        
        for value in f_td[depth]:
            depth_list.append(depth)
            time_list.append(value)
            veracity_list.append('False')

        for value in m_td[depth]:
            depth_list.append(depth)
            time_list.append(value)
            veracity_list.append('Mixed')

    df = pd.DataFrame({'time':time_list, 'depth':depth_list, 'type':veracity_list}) 
    line = LinePlot()
    line.set_sns_plot(df)
    """
    y_ticks1 = [np.median(t_td[depth]) for depth in x_ticks]
    y_ticks2 = [np.median(f_td[depth]) for depth in x_ticks]
    y_ticks3 = [np.median(m_td[depth]) for depth in x_ticks]

    print(y_ticks1)
    print(y_ticks2)
    print(y_ticks3)
   
    line = LinePlot()
    line.set_ylog()
    line.set_label('Depth', 'Median Minutes')
    line.set_plot_data(y_ticks1, x_ticks)
    line.set_plot_data(y_ticks2, x_ticks)
    line.set_plot_data(y_ticks3, x_ticks)
    line.set_legends(['True', 'False', 'Mixed'])
    line.set_xticks(x_ticks)
    line.save_image('%s/time_depth_line_echo_chamber.png'%(foldername))
    
    #number of users to depth 
    u_ticks1 = [np.mean(t_ud[depth]) for depth in x_ticks]
    u_ticks2 = [np.mean(f_ud[depth]) for depth in x_ticks]
    u_ticks3 = [np.mean(m_ud[depth]) for depth in x_ticks]
    print(u_ticks1)

    line = LinePlot()
    line.set_ylog()
    line.set_label('Depth', 'Mean Unique Users')
    line.set_plot_data(u_ticks1, x_ticks)
    line.set_plot_data(u_ticks2, x_ticks)
    line.set_plot_data(u_ticks3, x_ticks)
    line.set_legends(['True', 'False', 'Mixed'])
    line.set_xticks(x_ticks)
    line.save_image('%s/user_depth_line_echo_chamber.png'%(foldername))
Esempio n. 3
0
def user_to_depth(user_depth, user_depth2, user_depth3):
    #time_depth = get_depth_time_series('True')    
    #time_depth2 = get_depth_time_series('False')    
    #time_depth3 = get_depth_time_series('Mixture,Mostly True,Mostly False')    

    #mean time to get to depth
    print(user_depth)
    x_ticks = np.arange(0, 20, 1)
    x_ticks1 = user_depth.keys()
    x_ticks2 = user_depth2.keys()
    x_ticks3 = user_depth3.keys()
    y_ticks1 = [np.mean(user_depth[depth].values()) for depth in x_ticks1]
    y_ticks2 = [np.mean(user_depth2[depth].values()) for depth in x_ticks2]
    y_ticks3 = [np.mean(user_depth3[depth].values()) for depth in x_ticks3]
    if len(x_ticks1) > len(x_ticks2) and len(x_ticks1) > len(x_ticks2):
        x_ticks = x_ticks1
    elif len(x_ticks2) > len(x_ticks1) and len(x_ticks2) > len(x_ticks3):
        x_ticks = x_ticks2
    else:
        x_ticks = x_ticks3
    line = LinePlot()
    line.set_ylog()
    line.set_label('Depth', 'Mean Unique Users')
    line.set_plot_data(y_ticks1, x_ticks)
    line.set_plot_data(y_ticks2, x_ticks)
    line.set_plot_data(y_ticks3, x_ticks)
    line.set_legends(['True', 'False', 'Mixed'])
    line.set_xticks(x_ticks)
    line.save_image('Image/user_depth_line.png')
Esempio n. 4
0
def draw_time_to_depth_echo_chamber(data, legend, data_type, filename):
    x_ticks = np.arange(1,20)
    line = LinePlot()
    line.set_ylog()
    line.set_label('Depth', data_type)
    for item in data:
        #yticks = [np.mean(item[depth]) for depth in x_ticks]
        yticks = [np.median(item[depth]) for depth in x_ticks]
        #u_ticks1 = [np.mean(outlier.remove_outlier(item[depth])) for depth in x_ticks]
        line.set_plot_data(yticks, x_ticks)
    line.set_legends(legend)
    line.set_xticks(x_ticks)
    line.save_image('%s/%s.png'%(foldername, filename))
Esempio n. 5
0
def cascade_change():

    with open('Data/echo_chamber2.json', 'r') as f:
        echo_chambers = json.load(f)

    count = 0
    for keys in echo_chambers.keys():
        users = echo_chambers[keys]
        cascade_series, user_index = get_cascade_time_series(keys, users)

        for key in cascade_series.keys():
            cascade = cascade_series[key]
            user = user_index[key]
            line = LinePlot()
            line.set_ylog()
            line.set_label('Cascade', 'User')
            line.set_xlog()
            line.set_axvline(user)
            line.set_plot_data(cascade, np.arange(1, len(cascade) + 1, 1))
            #line.set_plot_data(np.arange(1, len(cascade) + 1, 1), cascade)
            #line.set_hline(user, 0, len(cascade))
            #line.set_legends(['True', 'False', 'Mixed'])
            #line.set_xticks(x_ticks)
            line.save_image('Image/Cascade/cascade_change_line_%s_%s.png' %
                            (keys, key))

        count += 1
        if count > 10:
            break
Esempio n. 6
0
        for item in c:
            if item not in category_all:
                category_all.append(item)

    category_count = []
    for c in category_all:
        count_list = []
        for i in range(2011, 2018):
            count = category_count_by_year(i, c)
            count_list.append(count)
        category_count.append(count_list)
    #print(category_count)
    df = pd.DataFrame(category_count, index = category_all, columns = range(2011, 2018))
    #print(df)
    
    LinePlt = LinePlot()
    LinePlt.set_label('year', 'number of articles')
    LinePlt.set_plot_data(category_count, 'category count')
    LinePlt.set_xticks(range(2011, 2018))
    LinePlt.set_legends(category_all)
    LinePlt.save_image('./image/category_count_year.png')

    print("top key words by category")
    writer = pd.ExcelWriter('./trending_words/trending_keywords_category.xlsx', engine='xlsxwriter')
    dataframe_list = []
    category_data = []
    for item in categories:    
        words = frequency(titles_category(item))
        category_data.append(tuple_to_string(words))
    df = pd.DataFrame(category_data, index = categories, columns = range(1,11))
    dataframe_list.append(df)
Esempio n. 7
0
def draw_graph():
    depth_time1, depth_user1, unique_user_time1, cascade_depth1 = time_series('True')

    x_ticks1 = depth_time1.keys()
    y_ticks1 = [np.mean(depth_time1[depth].values()) for depth in x_ticks1]

    depth_time2, depth_user2, unique_user_time2, cascade_depth2 = time_series('False')
    
    x_ticks2 = depth_time2.keys()
    y_ticks2 = [np.mean(depth_time2[depth].values()) for depth in x_ticks1]

    #draw mean minutes - depth line plot 
    line = LinePlot()
    line.set_ylog()
    line.set_label('Depth', 'Mean Minutes')
    line.set_plot_data([y_ticks1, y_ticks2], x_ticks1)
    line.set_legends(['True', 'False'])
    line.save_image('Image/time_depth_line.png')

    x_ticks1 = unique_user_time1.keys()
    x_ticks2 = unique_user_time2.keys()
    x_ticks1 = sorted(x_ticks1)
    y_ticks1 = [np.mean(unique_user_time1[num].values()) for num in x_ticks1]
    y_ticks2 = [np.mean(unique_user_time2[num].values()) for num in x_ticks2]
    
    #draw mean minutes - unique users line plot 
    line = LinePlot()
    line.set_ylog()
    line.set_label('Unique Users', 'Mean Minutes')
    line.set_plot_data([y_ticks1, y_ticks2], x_ticks1)
    line.set_xticks(x_ticks1)
    line.set_legends(['True', 'False'])
    line.save_image('Image/time_users_line.png')

    all_depth_true = [[key] * len(depth_time1[key]) for key in depth_time1.keys()] #True
    all_depth_false = [[key] * len(depth_time2[key]) for key in depth_time2.keys()] #True
    all_depth_sum_true = []
    all_depth_sum_false = []

    for item in all_depth_true:
        all_depth_sum_true.extend(item)
    for item in all_depth_false:
        all_depth_sum_false.extend(item)

    #Depth CDF, CCDF
    #cdf = CDFPlot()
    #cdf.set_data(all_depth_sum_true, 'True')
    #cdf.set_data(all_depth_sum_false, 'False')
    #cdf.set_legends(['True', 'False'], '')
    #cdf.save_image('Image/depth_cdf.png')

    true_cascade = []
    false_cascade = []
    for postid in cascade_depth1.keys():
        for depth in cascade_depth1[postid].values(): #origin tweet : depth
            true_cascade.extend(depth)
 
    for postid in cascade_depth2.keys():
        for depth in cascade_depth2[postid].values(): #origin tweet : depth
            false_cascade.extend(depth)
   

    print('true')
    for i in range(1, 15):
        print(i, true_cascade.count(i))
    print('false')
    for i in range(1, 15):
        print(i, false_cascade.count(i))
    
    cdf = CDFPlot()
    cdf.set_legends(['True', 'False'], '')
    cdf.set_xlim(0, 11)
    #cdf.set_log(True)
    #cdf.set_ylog()
    cdf.set_label('Depth', 'CDF')
    cdf.set_data(true_cascade, 'True')
    cdf.set_data(false_cascade, 'False')
    cdf.save_image('Image/depth_cdf.png')
Esempio n. 8
0
def velocity_change():

    with open('Data/echo_chamber2.json', 'r') as f:
        echo_chambers = json.load(f)

    count = 0
    for keys in echo_chambers.keys():
        users = echo_chambers[keys]
        velocity_series, user_index, published_index = get_velocity_time_series(
            keys, users, 'True,False,Mixture,Mostly False,Mostly True')

        for key in velocity_series.keys():
            velocity = velocity_series[key]
            user = user_index[key]
            published_date = published_index[key]
            line = LinePlot()
            line.set_ylog()
            line.set_label('User', 'Time Diff')
            line.set_axvline(user, published_date)
            line.set_plot_data(velocity, np.arange(1, len(velocity) + 1, 1))
            #line.set_plot_data(np.arange(1, len(velocity) + 1, 1), velocity)
            #line.set_hline(user, 0, len(velocity))
            #line.set_legends(['True', 'False', 'Mixed'])
            #line.set_xticks(x_ticks)
            line.save_image('Image/Velocity/velocity_change_line_%s_%s.png' %
                            (keys, key))

        count += 1
        if count > 100:
            break
Esempio n. 9
0
    return before_list, after_list, rapid_list


def trim_trends(trend_values):
    trend_values = trend_values.replace('[', '').replace(']', '')
    trend_values = trend_values.replace('\n', '').split()
    trend_values = map(int, np.array(trend_values))

    return trend_values


if __name__ == '__main__':
    conn, cursor, = sql_connect()
    trend_values = [trim_trends(value) for value in get_all_trends()]
    LinePlt = LinePlot()
    LinePlt.set_label('days', 'trends')
    LinePlt.set_plot_data(trend_values, 'Google Trends')
    #LinePlt.set_xticks(range(2011, 2018))
    #LinePlt.set_legends(category_all)
    LinePlt.save_image('./image/trends_line.png')

    #draw with mean value
    trend_values = np.array(trend_values)
    trend_mean = np.mean(trend_values, axis=0)
    print(trend_mean)
    trend_mean = trend_mean.tolist()
    print(trend_mean)
    LinePlt = LinePlot()
    LinePlt.set_label('days', 'trends')
    LinePlt.set_plot_data(trend_mean, 'Google Trends')
def edge_homogeneity():
    files = os.listdir(dir_name)
    
    retweet_cache = {}
    homogeneity = []
    for ccc, postid in enumerate(files):
        #users_polarity[postid] = {}
        with open(dir_name  + '%s'%postid, 'r') as f:
            tweets = json.load(f)
            retweet_cache[postid] = tweets

        for tweet in tweets.values():
            p_score = get_polarity(tweet['user'])

            #calculate edge homogeneity
            if tweet['depth'] != 1:
                #compare with parents if parent is not root node 
                p_score2 = get_polarity(tweet['parent'])

                if p_score == -999 or p_score2 == -999:
                    continue
                e = p_score * p_score2

                #print(p_score, p_score2, round(e, 1))
                homogeneity.append(round(e, 1))
        

    #    if ccc == 10:
    #        break

    #compare with echo chamber node's edge homogeneity
    echo_chamber_users = {}
    e_homogeneity = []
    ne_homogeneity = []
    with open('Data/echo_chamber2.json') as f:
        echo_chamber = json.load(f)

    for key in echo_chamber:
        users = echo_chamber[key]

        if len(users) < 1:
            continue

        for postid in key.split('_'):
            echo_chamber_users[postid] = echo_chamber_users.get(postid, {})
            for user in users:
                echo_chamber_users[postid][user] = 1 
 
    for postid in echo_chamber_users.keys():
        tweets = retweet_cache[postid]

        for tweet in tweets.values():
            #echo chamber user's edge homogeneity
            if tweet['depth'] != 1:
                p_score = get_polarity(tweet['user'])
                p_score2 = get_polarity(tweet['parent'])

                if p_score == -999 or p_score2 == -999:
                    continue
            
                e = p_score * p_score2

                #print(p_score, p_score2, round(e, 1))
                if tweet['user'] in echo_chamber_users[postid].keys():
                    e_homogeneity.append(e)
                    #e_homogeneity.append(round(e, 1))
                else:
                    ne_homogeneity.append(e)
                    #ne_homogeneity.append(round(e, 1))


    draw_cdf_plot([e_homogeneity, ne_homogeneity], 'Homogenety', ['Echo Chambers', 'Non-Echo Chambers'], 'User type', 'homogeneity')

    with open('Data/homogeneity.json', 'w') as f:
        json.dump({'e':e_homogeneity, 'ne' : ne_homogeneity}, f)

    x_ticks = np.arange(-1,1.1, 0.1)
    x_ticks = np.around(x_ticks, decimals=1)
    e_count = []
    ne_count = []
    for x in x_ticks:
        e_count.append(e_homogeneity.count(x))
        ne_count.append(ne_homogeneity.count(x))
    line = LinePlot()
    line.set_ylog()
    line.set_label('Homogeneity', 'Number of Homogeneity')
    line.set_plot_data(e_count, x_ticks)
    line.set_plot_data(ne_count, x_ticks)
    line.set_legends(['Echo Chambers', 'Non-Echo Chambers'])
    line.set_xticks(x_ticks)
    line.save_image('Image/%s/homogeneity_line.png'%foldername)
Esempio n. 11
0
def draw_6_3_1_figures():
    with open('Data/Figure/6_3.json', 'r') as f:
        data = json.load(f)
        #json.dump({'rumor' : rumor_num, 'cascade_num' : cascade_num}, f)

    all_cascade_num = 48644
    all_tweet_num = 310545
    all_retweet_count = 264653
    rumor_num = data['rumor']
    cascade_num = data['cascade_num']
    all_retweet_num = data['all_user']
    all_retweet_median_num = data['all_median']
    all_retweet_mean_num = data['all_mean']

    x_ticks = range(0, len(rumor_num))

    print('top 10% echo chamber ', len(rumor_num))
    #print(x_ticks)
    #print(rumor_num)
    line = LinePlot()
    line.set_ylog()
    line.set_label('Rank', 'Number of Rumors')
    yticks = [rumor_num[i] for i in x_ticks]
    line.set_plot_data(yticks, x_ticks)
    #line.set_yticks(['0', '1 m', '5 m', '1 h', '1 day', '10 day'], index=[0,1,5,60, 24*60, 24*10*60])
    line.save_image('Image/Figure/6_3_1.png')

    x_ticks = range(0, len(rumor_num))
    line = LinePlot()
    line.set_ylog()
    line.set_label('Rank', 'Number of Cascades')
    yticks = [cascade_num[i] for i in x_ticks]
    line.set_plot_data(yticks, x_ticks)
    #line.set_yticks(['0', '1 m', '5 m', '1 h', '1 day', '10 day'], index=[0,1,5,60, 24*60, 24*10*60])
    line.save_image('Image/Figure/6_3_2.png')

    print('all', len(x_ticks))
    #portion of cascades
    echo_num = len(cascade_num)
    top_01 = int(echo_num * 0.01)
    top_1 = int(echo_num * 0.1)
    top_5 = int(echo_num * 0.5)
    top_10 = -1
    top01_p = cascade_num[top_01] / all_cascade_num * 100
    top1_p = cascade_num[top_1] / all_cascade_num * 100
    top5_p = cascade_num[top_5] / all_cascade_num * 100
    top10_p = cascade_num[top_10] / all_cascade_num * 100
    #print(cascade_num)
    print(top01_p, top1_p, top5_p, top10_p)
    barplot = BarPlot(1)
    barplot.set_data([0, 1, 2, 3], [top01_p, top1_p, top5_p, top10_p], '')
    barplot.set_xticks(['0.1%', '1%', '5%', '10%'])
    #barplot.set_ylim(100)
    barplot.set_label('Hub Echo Chambers', 'Participation of Cascades (%)')
    barplot.save_image('Image/Figure/6_3_3.png')

    print(top_01, top_1, top_5)
    top01_n = all_retweet_num[top_01] / all_retweet_count * 100
    top1_n = all_retweet_num[top_1] / all_retweet_count * 100
    top5_n = all_retweet_num[top_5] / all_retweet_count * 100
    top10_n = all_retweet_num[top_10] / all_retweet_count * 100
    print(all_retweet_num[top_10])
    print(top01_n, top1_n, top5_n, top10_n)
    barplot = BarPlot(1)
    barplot.set_multiple_data([top01_p, top1_p, top5_p, top10_p],
                              [top01_n, top1_n, top5_n, top10_n])
    barplot.set_xticks(['0.1%', '1%', '5%', '10%'])
    barplot.set_ylim(50)
    barplot.set_label('Hub Echo Chambers', 'Portion of Cascades (%)')
    #barplot.set_legends(['Cascade', 'Retweet'], '')
    barplot.save_image('Image/Figure/6_3_4.png')
    """
Esempio n. 12
0
def draw_5_2_1_figures():
    with open('Data/Figure/5_2_1.json', 'r') as f:
        data = json.load(f)

    #print(data)
    #draw_time_to_depth_echo_chamber([echo_chamber_values['time_depth'], non_echo_chamber_values['time_depth']], ['echo chamber', 'no echo chamber'], 'median minutes', 'time_depth_echo_chamber_line')
    #x_ticks = np.arange(1,18)
    x_ticks = range(1, 18)
    line = LinePlot()
    line.set_ylog()
    line.set_label('Depth', 'Depth Increment Time')
    xtick_labels = []
    print('sadfasdfasdfasdf')
    print(len(data))
    x_tickslabel = range(0, 17)
    x_tickslabel.append('')
    for item in data:
        #yticks = [np.mean(item[depth]) for depth in x_ticks]
        yticks = [np.median(item[str(depth)]) for depth in x_ticks]
        #u_ticks1 = [np.mean(outlier.remove_outlier(item[depth])) for depth in x_ticks]
        print(yticks)
        line.set_plot_data(yticks, x_tickslabel)
    print(x_ticks)
    #print(x_ticks[0]['time_depth'])
    #print(x_ticks[1]['time_depth'])
    line.set_legends(['Echo chamber', 'Non-echo chamber'])
    line.set_xticks(x_tickslabel)
    line.set_yticks(['0', '1 m', '5 m', '1 h', '1 day', '10 day'],
                    index=[0, 1, 5, 60, 24 * 60, 24 * 10 * 60])
    line.save_image('Image/Figure/5_2_1.png')
    print(xtick_labels)