Beispiel #1
0
def plot_comments_count():
    all = load_obj('comments_count_by_month')
    reply = load_obj('comments_reply_count_by_month')
    noreply = load_obj('comments_noreply_count_by_month')

    dates = []
    for i in range(2008, 2014):
        for j in range(1, 13):
            if i == 2013 and j == 7:
                break
            date = "0{}-{}".format(j, i)
            dates.append(date)

    fig, ax = plt.subplots(figsize=(10, 6))
    # plt.title('Liczba zamieszczanych komentarzy')
    plt.ylabel('Liczba komentarzy')
    plt.plot(dates, all)
    plt.plot(dates, reply)
    plt.plot(dates, noreply)

    plt.legend(['wszystkie komentarze', 'odpowiedzi na komentarze', 'bezpośrednie komentarze do posta'], loc='upper left')
    # plt.yscale('log', nonposy='clip')
    plt.xticks(rotation=45)

    every_nth = 6
    for n, label in enumerate(ax.xaxis.get_ticklabels()):
        if n % every_nth != 0:
            label.set_visible(False)

    plt.savefig('charts/comment_frequency')
    plt.show()
Beispiel #2
0
def plot_popular_users_posts_avg(dict):
    big = load_obj('avg_posts_count_active_popularity')
    avg = load_obj('avg_posts_count_average_active_popularity')

    dates = []
    for i in range(2008, 2014):
        for j in range(1, 13):
            if i == 2013 and j == 7:
                break
            date = "0{}-{}".format(j, i)
            dates.append(date)

    fig, ax = plt.subplots(figsize=(10, 6))
    plt.title('Częstotliwość zameszczania postów')
    plt.ylabel('Liczba postów')
    plt.plot(dates, big)
    plt.plot(dates, avg)
    plt.legend(['duża popularność', 'średnia popularność'], loc='upper right')
    # plt.yscale('log', nonposy='clip')
    plt.xticks(rotation=45)

    every_nth = 6
    for n, label in enumerate(ax.xaxis.get_ticklabels()):
        if n % every_nth != 0:
            label.set_visible(False)

    plt.savefig('charts/{}_user_post_frequency'.format(dict))
    plt.show()
Beispiel #3
0
def create_post_user_measures_dict():
    post_user_measures = {}
    authors = load_obj("authors")
    popularity = load_obj("user_popularity_all")
    for id in authors:
        if id in popularity:
            post_user_measures[id] = {}
            post_user_measures[id]["influence"] = calculate_user_influence(id)
            post_user_measures[id]["popularity"] = popularity[id]
    save_obj(post_user_measures, "post_user_measures")
Beispiel #4
0
def add_percentage_of_commented_posts_to_post_user_dict():
    post_user_dict = load_obj("post_user_dict")
    user_post = load_obj("user_post")
    for post_author in user_post:
        post_count = post_user_dict[post_author]["post_count"]
        for comment_author in user_post[post_author]:
            commented_posts = user_post[post_author][comment_author][
                "commented_posts"]
            user_post[post_author][comment_author][
                "percentage_of_commented_posts"] = round(
                    (commented_posts / post_count) * 100, 2)
    save_obj(user_post, "user_post")
Beispiel #5
0
def add_percentage_of_commented_posts_to_dict():
    post_user_dict = load_obj("post_user_dict")
    user_relation = load_obj("user_relation")
    for comment_author in user_relation:
        for post_user in user_relation[comment_author]:
            post_count = post_user_dict[post_user]["post_count"]
            commented_posts = user_relation[comment_author][post_user][
                "commented_posts"]
            user_relation[comment_author][post_user][
                "percentage_of_commented_posts"] = round(
                    (commented_posts / post_count) * 100, 2)
    save_obj(user_relation, "user_relation")
Beispiel #6
0
def user_first_interaction():
    users = {}
    comment = load_obj("first_comment_date_by_user")
    post = load_obj("first_post_date_by_user")
    for user in comment:
        users[user] = comment[user]
    for user in post:
        if user not in users:
            users[user] = post[user]
        elif post[user] < users[user]:
            users[user] = post[user]
    save_obj(users, "user_first_interaction_date")
Beispiel #7
0
def create_user_relation_more_than_100_posts_dict():
    user_relation = load_obj("user_relation")
    post_user_dict = load_obj("post_user_dict")
    user_relation_100_posts = {}
    for comment_author in list(user_relation):
        user_relation_100_posts[comment_author] = {}
        for post_author in list(user_relation[comment_author]):
            if post_user_dict[post_author]["post_count"] >= 100:
                user_relation_100_posts[comment_author][post_author] = {}
                user_relation_100_posts[comment_author][post_author]["percentage_of_commented_posts"] = \
                    user_relation[comment_author][post_author]["percentage_of_commented_posts"]
    save_obj(user_relation_100_posts, "user_relation_100_posts")
    print_dict(user_relation_100_posts)
Beispiel #8
0
def sowiniec_chart():
    s = load_obj("sowiniec_godziemba")
    g = load_obj("godziemba_sowiniec")
    sp = load_obj("sowiniec_posts")
    gp = load_obj("godziemba_posts")

    s_ratio = []
    g_ratio = []

    for i in range(len(s)):
        if sp[i] == 0:
            s_ratio.append(0)
        else:
            s_ratio.append(s[i]/sp[i])
        if gp[i] == 0:
            g_ratio.append(0)
        else:
            g_ratio.append(g[i]/gp[i])
        if s_ratio[i] > 1:
            s_ratio[i] = 1
        if g_ratio[i] > 1:
            g_ratio[i] = 1

    dates = []
    for i in range(2008, 2014):
        for j in range(1, 13):
            if i == 2013 and j == 7:
                break
            date = "0{}-{}".format(j, i)
            dates.append(date)

    fig, ax = plt.subplots(figsize=(10, 6))
    # plt.title('Relacja użytkowników Sowiniec - Godziemba')
    plt.ylabel('Stosunek liczby komentarzy do liczby postów użytkownika')

    plt.plot(dates, s_ratio)
    plt.plot(dates, g_ratio)

    plt.legend(['komentarze otrzymane przez Sowińca od Godziemby', 'komentarze otrzymane przez Godziembę od Sownińca'],
               loc='lower right')
    # plt.yscale('log', nonposy='clip')
    plt.xticks(rotation=45)

    every_nth = 6
    for n, label in enumerate(ax.xaxis.get_ticklabels()):
        if n % every_nth != 0:
            label.set_visible(False)

    plt.savefig('charts/sowiniec_godziemba.png')
    plt.show()
Beispiel #9
0
def plot_user_relation_hist(id):
    a = load_obj("authors")
    d = load_obj('user_post_100')
    l = []
    for u in d[id]:
        l.append(d[id][u]['percentage_of_commented_posts'])

    num_bins = 20
    plt.title("Histogram - % skomentowanych postów - {}".format(a[id]))
    plt.xlabel("% skomentowanych postów")
    plt.ylabel("Liczba użytkowników")
    # plt.yscale('log', nonposy='clip')
    plt.hist(l, num_bins, facecolor='blue', alpha=0.5)
    plt.savefig('charts/user_relation_{}'.format(a[id]))
    plt.show()
Beispiel #10
0
def calculate_user_influence(user_id):
    post_user_dict = load_obj("post_user_dict")
    authors = load_obj("authors")
    if user_id in post_user_dict:
        if post_user_dict[user_id]['followers_10'] != 0:
            ir = post_user_dict[user_id][
                'unique_users_counter'] / post_user_dict[user_id][
                    'followers_10']
        else:
            ir = 0
        rmr = post_user_dict[user_id]['all_inter_sum'] / post_user_dict[
            user_id]['post_count']
        snp = round(((ir + rmr) / 2), 4)
        print(str(authors[user_id]) + " & " + str(snp) + " &")
        return snp
Beispiel #11
0
def plot_popularity_max_and_avg(dict):
    max_vals = []
    avg_vals = []
    d2 = load_obj(dict)
    for i in range(0, 66):
        d = d2[i]
        m_val = max(user['popularity_weight'] for user in d.values())
        avg_val = sum(user['popularity_weight'] for user in d.values()) / len(d)
        max_vals.append(m_val)
        avg_vals.append(avg_val)

    dates = []
    for i in range(2008, 2014):
        for j in range(1, 13):
            if i == 2013 and j == 7:
                break
            date = "0{}-{}".format(j, i)
            dates.append(date)

    fig, ax = plt.subplots(figsize=(10, 6))
    plt.title('Średnia i maksymalna wartość miary popularności w zależności od miesiąca')
    plt.ylabel('Popularność')
    plt.plot(dates, max_vals)
    plt.plot(dates, avg_vals)
    plt.legend(['max', 'średnia'], loc='upper right')
    plt.yscale('log', nonposy='clip')
    plt.xticks(rotation=45)

    every_nth = 6
    for n, label in enumerate(ax.xaxis.get_ticklabels()):
        if n % every_nth != 0:
            label.set_visible(False)

    plt.savefig('charts/{}_max_avg'.format(dict))
    plt.show()
Beispiel #12
0
def find_max_interaction_of_post_user():
    post_author = load_obj("user_post")
    a = load_obj("authors")
    d = {}

    for user in post_author:
        maxval = find_max_val_and_key(post_author[user])
        d[user] = maxval

    s = list(sorted(d.keys(), key=lambda x: d[x][1], reverse=True))

    # print_dict(s)
    for i in s:
        print(
            str(a[i]) + " & " + str(a[d[i][0]]) + " & " + str(d[i][1]) +
            "\\\\")
Beispiel #13
0
def return_sorted_relation_user_post_author(user_id):
    user_relation = load_obj("user_post")
    post_users_dict = user_relation[user_id]
    res = sorted(post_users_dict.items(),
                 key=lambda x: x[1]['percentage_of_commented_posts'],
                 reverse=True)
    print_dict(res)
Beispiel #14
0
def plot_users_by_month():
    dates = []
    for i in range(2008, 2014):
        for j in range(1, 13):
            if i == 2013 and j == 7:
                break
            date = "0{}-{}".format(j, i)
            dates.append(date)
    print(len(dates))
    users = load_obj("users_by_month")
    print(len(users))
    fig, ax = plt.subplots(figsize=(10, 6))
    # plt.title("Liczba aktywnych użytkowników portalu Salon24 na przestrzeni 5 lat")
    plt.ylabel("Liczba użytkowników")
    plt.xticks(rotation=45)
    plt.plot(dates, users)
    print(ax.xaxis.get_ticklabels())

    every_nth = 6
    for n, label in enumerate(ax.xaxis.get_ticklabels()):
        print(label)
        if n % every_nth != 0:
            label.set_visible(False)

    plt.savefig("charts/users_by_month.png")
    plt.show()
Beispiel #15
0
def plot_popularuty_histogram():
    user_popularity = load_obj("user_popularity_without_comment_replies")
    num_bins = 100
    vals = user_popularity.values()
    v = [float(x) for x in vals if float(x) >= 0.01]
    plt.title("Popularność użytkowników - histogram")
    plt.xlabel("Popularity")
    plt.ylabel("Liczba użytkowników")
    plt.yscale('log', nonposy='clip')
    plt.hist(v, num_bins, facecolor='blue', alpha=0.5)
    plt.show()
Beispiel #16
0
def plot_relation_count():
    d = load_obj("comment_relation_count")
    plt.yscale('log', nonposy='clip')
    bars = plt.bar(list(d.keys()), d.values(), color='g')
    plt.gca().set_xticks(list(d.keys()))
    plt.title("Liczba relacji między użytkownikami wg podziału 0-10")
    plt.xlabel("Poziom relacji")
    plt.ylabel("Liczba relacji")
    for bar in bars:
        yval = bar.get_height()
        plt.text(bar.get_x() + 0.4, yval + 0.005, yval, ha='center')
    plt.savefig("charts/comment_relation_count")
    plt.show()
Beispiel #17
0
def plot_influence_count():
    data = load_obj("influence_rmi_by_month")
    weak_p = []
    avg_p = []
    big_p = []
    for i in range(0, 66):
        c_w = 0
        c_a = 0
        c_b = 0
        d = data[i]
        for user in d:
            if d[user]['infuence_w'] >= 15:
                c_b += 1
            elif 5 <= d[user]['infuence_w'] < 15:
                c_a += 1
            elif 2 <= d[user]['infuence_w'] < 5:
                c_w += 1
        weak_p.append(c_w)
        avg_p.append(c_a)
        big_p.append(c_b)

    dates = []
    for i in range(2008, 2014):
        for j in range(1, 13):
            if i == 2013 and j == 7:
                break
            date = "0{}-{}".format(j, i)
            dates.append(date)

    fig, ax = plt.subplots(figsize=(10, 6))
    # plt.title('Liczba użytkowników wg progów wpływowości w zależności od miesiąca')
    plt.ylabel('Liczba użytkowników')
    plt.plot(dates, weak_p)
    plt.plot(dates, avg_p)
    plt.plot(dates, big_p)
    plt.legend(['słaba wpływowość', 'średnia wpływowość', 'duża wpływowość'], loc='upper left')
    # plt.yscale('log', nonposy='clip')
    plt.xticks(rotation=45)

    every_nth = 6
    for n, label in enumerate(ax.xaxis.get_ticklabels()):
        if n % every_nth != 0:
            label.set_visible(False)

    plt.savefig('charts/influence_count.png')
    plt.show()

    print(sum(weak_p) / len(weak_p))
    print(sum(avg_p) / len(avg_p))
    print(sum(big_p) / len(big_p))
Beispiel #18
0
def plot_influence_hist():
    d = load_obj("influence_static")
    l = []
    for u in d:
        l.append(d[u])

    # num_bins = 20
    plt.title("Histogram - wpływowość użytkowników")
    plt.xlabel("Miara wpływowości")
    plt.ylabel("Liczba użytkowników")
    plt.yscale('log', nonposy='clip')
    plt.hist(l, 200, facecolor='blue', alpha=0.5)
    plt.savefig('charts/influence_static')
    plt.show()
Beispiel #19
0
def plot_influence_histogram():
    post_user_measures = load_obj("post_user_measures")
    num_bins = 100
    influence = []
    for user in post_user_measures:
        if post_user_measures[user]["influence"] is not None:
            influence.append(post_user_measures[user]["influence"])
    print(influence)
    plt.title("Wpływowość użytkowników - histogram")
    plt.xlabel("Influence")
    plt.ylabel("Liczba użytkowników")
    plt.yscale('log', nonposy='clip')
    plt.hist(influence, num_bins, facecolor='blue', alpha=0.5)
    plt.show()
Beispiel #20
0
def plot_popularity_count(dict):
    weak_p = []
    avg_p = []
    big_p = []
    d2 = load_obj(dict)
    for i in range(0, 66):
        c_w = 0
        c_a = 0
        c_b = 0
        d = d2[i]
        for user in d:
            if d[user]['popularity_weight'] >= 0.3:
                c_b += 1
            elif 0.12 <= d[user]['popularity_weight'] < 0.3:
                c_a += 1
            elif 0.05 <= d[user]['popularity_weight'] < 0.12:
                c_w += 1
        weak_p.append(c_w)
        avg_p.append(c_a)
        big_p.append(c_b)

    dates = []
    for i in range(2008, 2014):
        for j in range(1, 13):
            if i == 2013 and j == 7:
                break
            date = "0{}-{}".format(j, i)
            dates.append(date)

    fig, ax = plt.subplots(figsize=(10, 6))
    # plt.title('Liczba użytkowników wg progów popularności w zależności od miesiąca')
    plt.ylabel('Liczba użytkowników')
    plt.plot(dates, weak_p)
    plt.plot(dates, avg_p)
    plt.plot(dates, big_p)
    plt.legend(['słaba popularność', 'średnia popularność', 'duża popularność'], loc='upper left')
    # plt.yscale('log', nonposy='clip')
    plt.xticks(rotation=45)

    print(sum(weak_p) / len(weak_p))
    print(sum(avg_p) / len(avg_p))
    print(sum(big_p) / len(big_p))

    every_nth = 6
    for n, label in enumerate(ax.xaxis.get_ticklabels()):
        if n % every_nth != 0:
            label.set_visible(False)

    plt.savefig('charts/{}_user_count'.format(dict))
    plt.show()
Beispiel #21
0
def plot_user_interactions_histogram():
    d = load_obj("all_users_posts_comment_count")
    l = []
    for u in d:
        l.append(d[u])

    # num_bins = 20
    plt.title("Histogram - liczba wpisów użytkowników")
    plt.xlabel("Liczba wpisów")
    plt.ylabel("Liczba użytkowników")
    plt.yscale('log', nonposy='clip')
    plt.hist(l, 200, facecolor='blue', alpha=0.5)
    plt.savefig('charts/user_interactions')
    plt.show()
Beispiel #22
0
def plot_active_popular_users_weights(dict):
    dates = []
    for i in range(2008, 2014):
        for j in range(1, 13):
            if i == 2013 and j == 7:
                break
            date = "0{}-{}".format(j, i)
            dates.append(date)
    fig, ax = plt.subplots(figsize=(10, 6))

    plt.ylabel("Popularność")
    plt.xticks(rotation=45)

    all_time_popular_ids = [657, 783, 11926, 675, 1991]
    popular_users = {}
    for i in all_time_popular_ids:
        popular_users[i] = []
    # print_dict(popular_users)
    dict = load_obj(dict)
    for i in range(0, 66):
        print(i)
        active_popularity = dict[i]
        # print_dict(active_popularity)
        for j in all_time_popular_ids:
            if j in active_popularity:
                # print("{} - {} - {}".format())
                popular_users[j].append(active_popularity[j]['popularity_weight'])
            else:
                popular_users[j].append(0)


    for i in popular_users:
        plt.plot(dates, popular_users[i], linewidth=0.7)

    plt.legend(['FREE YOUR MIND', 'RENATA RUDECKA-KALINOWSKA', 'KRZYSZTOF LESKI', 'CEZARY KRYSZTOPA', 'SOWINIEC'],
               loc='upper right')


    every_nth = 6
    for n, label in enumerate(ax.xaxis.get_ticklabels()):
        # print(label)
        if n % every_nth != 0:
            label.set_visible(False)

    plt.savefig("charts/{}_weighted_fix.png".format("all"))
    plt.show()
Beispiel #23
0
def calculate_influence():
    post_user_dict = load_obj("post_user_dict")
    users = []
    snp_list = []
    for user in post_user_dict:
        if post_user_dict[user]['followers_20'] != 0:
            ir = post_user_dict[user]['unique_users_counter'] / post_user_dict[
                user]['followers_20']
        else:
            ir = 0
        rmr = post_user_dict[user]['all_inter_sum'] / post_user_dict[user][
            'post_count']
        snp = round(((ir + rmr) / 2), 4)
        # print(str(user) + " - " + str(snp))
        users.append(user)
        snp_list.append(snp)
    list1, list2 = zip(*sorted(zip(snp_list, users), reverse=True))
    return list1, list2
Beispiel #24
0
 def create_huge_dictionary3(self):
     d = load_obj("huge_dict")
     for row in self.records:
         author_id = row[0]
         date = row[1]
         interactedWith = row[2]
         if author_id not in d:
             d[author_id] = {}
             d[author_id]["posts"] = {}
             d[author_id]["posts"]["dates"] = []
             d[author_id]["posts"]["interactions"] = []
             d[author_id]["commnets"] = {}
             d[author_id]["commnets"]["dates"] = []
             d[author_id]["commnets"]["interactedWith"] = []
             d[author_id]["commnets"]["interactions"] = []
         d[author_id]["commnets"]["dates"].append(date)
         d[author_id]["commnets"]["interactedWith"].append(interactedWith)
         d[author_id]["commnets"]["interactions"].append(0)
     save_obj(d, "huge_dict")
Beispiel #25
0
def plot_post_count_by_month():
    dates = []
    for i in range(2008, 2014):
        for j in range(1, 13):
            if i == 2013 and j == 7:
                break
            date = "0{}-{}".format(j, i)
            dates.append(date)
    fig, ax = plt.subplots(figsize=(10, 6))
    plt.title("Liczba postów zamieszczonych w ciągu miesiąca przez danego użytkownika")
    plt.ylabel("Liczba postów")
    plt.xticks(rotation=45)

    all_time_popular_ids = [657, 11926, 675, 440, 783] #, 496, 797, 11, 440, 66]
    popular_users_post_count = {}
    for i in all_time_popular_ids:
        popular_users_post_count[i] = []

    for i in range(0, 66):
        print(i)
        post_count = load_obj("post_count_by_month_{}".format(i))
        for j in all_time_popular_ids:
            if j in post_count:
                popular_users_post_count[j].append(post_count[j])
            else:
                popular_users_post_count[j].append(0)

    for i in popular_users_post_count:
        plt.plot(dates, popular_users_post_count[i], linewidth=0.7)

    plt.legend(['FREE YOUR MIND', 'KRZYSZTOF LESKI', 'CEZARY KRYSZTOPA', 'MAREK MIGALSKI', 'RENATA RUDECKA-KALINOWSKA'],
               loc='upper right')

    every_nth = 6
    for n, label in enumerate(ax.xaxis.get_ticklabels()):
        if n % every_nth != 0:
            label.set_visible(False)

    plt.savefig("charts/popular_users_post_by_month.png")
    plt.show()
Beispiel #26
0
def plot_popularity_histograms(dict):

    d = load_obj(dict)

    popularity_dicts = {2009: d[12],
                        2010: d[24],
                        2011: d[36],
                        2012: d[48]}

    num_bins = 100

    for year in popularity_dicts:
        plt.title("Histogram - popularność - styczeń {}".format(year))
        plt.xlabel("Popularity")
        plt.ylabel("Liczba użytkowników")
        plt.yscale('log', nonposy='clip')
        popularity = []
        for user in popularity_dicts[year]:
            popularity.append(float(popularity_dicts[year][user]['popularity_weight']))
        plt.hist(popularity, num_bins, facecolor='blue', alpha=0.5)
        plt.savefig('charts/{}_{}'.format(dict, year))
        plt.show()
        j01 = [i for i in popularity if i >= 0.1]
        j02 = [i for i in popularity if i >= 0.2]
Beispiel #27
0
def plot_active_users_by_month():
    dates = []
    for i in range(2008, 2014):
        for j in range(1, 13):
            if i == 2013 and j == 7:
                break
            date = "0{}-{}".format(j, i)
            dates.append(date)
    users_count = load_obj("active_users_by_month")
    fig, ax = plt.subplots(figsize=(10, 6))
    # plt.title("Aktywni użytkownicy w danym miesiącu")
    plt.ylabel("Liczba użytkowników")
    plt.xticks(rotation=45)
    plt.plot(dates, users_count)
    print(ax.xaxis.get_ticklabels())

    every_nth = 6
    for n, label in enumerate(ax.xaxis.get_ticklabels()):
        print(label)
        if n % every_nth != 0:
            label.set_visible(False)

    plt.savefig("charts/active_users_by_month.png")
    plt.show()