def profile_with_max_views(limit=20): profiles = Profile.select().order_by(Profile.views.desc()).limit(limit) print("Top %d Writers with most answers - " % limit) for profile in profiles: u = PROFILE_URL.format(profile.uname) print("{0} ({1} views)".format(u, profile.views))
def profile_with_max_followers(limit=20): profiles = Profile.select().order_by(Profile.followers.desc()).limit(limit) print('Top %d Most followed - ' % limit) for profile in profiles: u = PROFILE_URL.format(profile.uname) print('{0} ({1} followers)'.format(u, profile.followers))
def profile_with_max_followers(limit=20): profiles = Profile.select().order_by(Profile.followers.desc()).limit(limit) print("Top %d Most followed - " % limit) for profile in profiles: u = PROFILE_URL.format(profile.uname) print("{0} ({1} followers)".format(u, profile.followers))
def profile_with_max_views(limit=20): profiles = Profile.select().order_by(Profile.views.desc()).limit(limit) print('Top %d Writers with most answers - ' % limit) for profile in profiles: u = PROFILE_URL.format(profile.uname) print('{0} ({1} views)'.format(u, profile.views))
def profile_with_max_edits(limit=20): profiles = Profile.select().order_by(Profile.edits.desc()).limit(limit) # Top Writer tw = {} with open('top_writers_2016.json', 'r') as fstream: writer_list = json.load(fstream) for writer in writer_list: tw[writer['uname']] = 0 top_writer_count = 0 total_edits = 0 for profile in profiles: u = PROFILE_URL.format(profile.uname) print(u'{0} ({1} edits)'.format(u, profile.posts)) total_edits += profile.posts if profile.uname in tw: top_writer_count += 1 avg_edits = total_edits / len(profiles) print("Total number of edits is %d" % total_edits) print("Average number of edits by a single user is %d" % avg_edits) print("Number on Top Writers 2016 on this list is %d" % top_writer_count) # Visualisation fig = plt.figure(figsize=(21, 14)) plt.title("Users with Most Edits on Quora") ydata = [prof.edits for prof in profiles] xticks = [prof.name for prof in profiles] plt.xlabel('User') plt.ylabel('Posts Written by User') plt.bar(range(len(profiles)), ydata, align='center') plt.xticks(range(len(profiles)), xticks, rotation=90) plt.hlines(avg_edits, -0.5, len(profiles) - 0.5, label='Average', colors='red') #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0)) #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) plt.xlim([-0.5, len(profiles) - 0.5]) fig.tight_layout() #plt.set_ylim([10000, 25000]) plt.savefig('most_posts.png', facecolor='white', edgecolor='black')
create_directory(PROFILE_FOLDER) for writer in writer_list: new = Profile.create_or_get(uname=writer['uname'], name=writer['name'])[1] if new: print(u'New Profile %s Created' % writer['uname']) #print "Number of Writers Added = ", len(writer_list) # Starting to Crawl total_parsing = 0 max_crawl = args.max_crawl while total_parsing < max_crawl: if not args.no_profile: # Parse Old Profiles old_time = datetime.datetime.now() - datetime.timedelta(days=7) old_profiles = Profile.select().where( Profile.last_parsed <= old_time).limit(max_crawl - total_parsing) total_parsing += len(old_profiles) print "Number of Profiles to Crawl - ", len(old_profiles) for profile in old_profiles: try: parse_profile(profile) except Exception, err: #pylint-diasble: print err traceback.print_exc(file=sys.stdout) pass if total_parsing >= max_crawl: break if not args.no_answer: # Parse Old Answers old_time = datetime.datetime.now() - datetime.timedelta(days=7)
writer_list += json.load(fstream) create_directory(ANSWERS_FOLDER) create_directory(PROFILE_FOLDER) for writer in writer_list: new = Profile.create_or_get(uname=writer['uname'], name=writer['name'])[1] if new: print(u'New Profile %s Created' % writer['uname']) #print "Number of Writers Added = ", len(writer_list) # Starting to Crawl total_parsing = 0 max_crawl = args.max_crawl while total_parsing < max_crawl: if not args.no_profile: # Parse Old Profiles old_time = datetime.datetime.now() - datetime.timedelta(days=7) old_profiles = Profile.select().where( Profile.last_parsed <= old_time).limit(max_crawl - total_parsing) total_parsing += len(old_profiles) print "Number of Profiles to Crawl - ", len(old_profiles) for profile in old_profiles: try: parse_profile(profile) except Exception, err: #pylint-diasble: print err traceback.print_exc(file=sys.stdout) pass if total_parsing >= max_crawl: break if not args.no_answer: # Parse Old Answers old_time = datetime.datetime.now() - datetime.timedelta(days=7) old_answers = Answer.select().where(
def profile_with_max_followers(limit=20): profiles = Profile.select().order_by(Profile.followers.desc()).limit(limit) total_views = 0 total_followers = 0 total_following = 0 total_answers = 0 total_views = 0 # Top Writer tw = {} with open('top_writers_2016.json', 'r') as fstream: writer_list = json.load(fstream) for writer in writer_list: tw[writer['uname']] = 0 top_writer_count = 0 print('Top %d Most followed - ' % limit) for profile in profiles: u = PROFILE_URL.format(profile.uname) print(u'{0} ({1} followers)'.format(u, profile.followers)) total_views += profile.views total_followers += profile.followers total_following += profile.following total_views += profile.views total_answers += profile.total_answers if profile.uname in tw: top_writer_count += 1 avg_followers = total_followers / len(profiles) avg_following = total_following / len(profiles) avg_answers = total_answers / len(profiles) avg_views = total_views / len(profiles) avg_v_gain_f = total_views / total_followers avg_f_gain_ans = total_followers / total_answers print("Total number of all-time views is %d" % total_views) print("Average number of all-time views is %d" % avg_views) print("Average number of followers is %d" % avg_followers) print("Average number of views gained per follower is %d" % avg_v_gain_f) print("Total number of answers written by them is %d" % total_answers) print("Average number of answers written by each is %d" % avg_answers) print("Average number of people these writers follow is %d" % avg_following) print("Average number of followers gained per answer is %d" % avg_f_gain_ans) print("Number on Top Writers 2016 is %d" % top_writer_count) fig = plt.figure(figsize=(21, 14)) plt.title("Users with most followers on Quora") ydata = [prof.followers for prof in profiles] xticks = [prof.name for prof in profiles] plt.xlabel('Profile') plt.ylabel('Number of followers') plt.bar(range(len(profiles)), ydata, align='center') plt.xticks(range(len(profiles)), xticks, rotation=90) plt.hlines(avg_followers, -0.5, len(profiles) - 0.5, label='Average', colors='red') #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0)) #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) plt.xlim([-0.5, len(profiles) - 0.5]) fig.tight_layout() #plt.set_ylim([10000, 25000]) plt.savefig('most_followed_users.png', facecolor='white', edgecolor='black') ff_ratio = [] xticks = [] fa_ratio = [] for p in profiles: if p.total_answers > 10 and p.following > 10 and \ p.followers / p.following < 4000: ff_ratio.append(float(p.followers) / p.following) fa_ratio.append(float(p.followers) / p.total_answers) xticks.append(p.name) print(len(ff_ratio)) print(len(fa_ratio)) avg_ff = float(sum(ff_ratio)) / len(ff_ratio) avg_fa = float(sum(fa_ratio)) / len(fa_ratio) xpos_ff = range(len(ff_ratio)) xpos_fa = [x + 0.4 for x in range(len(fa_ratio))] tick_pos = [x + 0.4 for x in range(len(fa_ratio))] fig = plt.figure(figsize=(21, 14)) plt.title("Metrics of Users with most followers on Quora") bar1 = plt.bar(xpos_ff, ff_ratio, 0.35, color='red') bar2 = plt.bar(xpos_fa, fa_ratio, 0.35, color='blue') l1 = plt.hlines(avg_ff, -0.5, len(ff_ratio), linestyles='dashed', color='red', label='Average Followers / Following ratio') l2 = plt.hlines(avg_fa, -0.5, len(ff_ratio), linestyles='dashed', color='blue', label='Average Followers / Answers ratio') plt.xlabel('Users') plt.ylabel('Ratio Value') plt.xticks(tick_pos, xticks, rotation=90) plt.legend((bar1[0], bar2[0], l1, l2), ("Follower / Following Ratio", "Follower / Answer Ratio", "Average Followers / Following ratio", "Average Followers / Answers ratio")) plt.xlim([-0.5, len(ff_ratio)]) fig.tight_layout() plt.savefig('user_metric.png', facecolor='white', edgecolor='black')
def profile_with_max_views(limit=200): profiles = Profile.select().order_by(Profile.views.desc()).limit(limit) total_views = 0 total_followers = 0 total_following = 0 total_answers = 0 total_views = 0 # Top Writer tw = {} with open('top_writers_2016.json', 'r') as fstream: writer_list = json.load(fstream) for writer in writer_list: tw[writer['uname']] = 0 top_writer_count = 0 print('Top %d Writers with most answers - ' % limit) for profile in profiles: u = PROFILE_URL.format(profile.uname) print(u'{0} ({1})'.format(u, profile.views)) total_views += profile.views total_followers += profile.followers total_following += profile.following total_views += profile.views total_answers += profile.answers if profile.uname in tw: top_writer_count += 1 average_per_f = total_views / total_followers # Per Follower Plot fig = plt.figure(figsize=(21, 14)) plt.title("Views / Follower for Most Viewed Writers") ydata = [float(prof.views) / prof.followers for prof in profiles] xticks = [prof.name for prof in profiles] plt.xlabel('Profile') plt.ylabel('Views per Follower') plt.bar(range(len(profiles)), ydata, align='center') plt.xticks(range(len(profiles)), xticks, rotation=90) plt.hlines(average_per_f, -0.5, len(profiles) - 0.5, label='Average', colors='red') #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0)) #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) plt.xlim([-0.5, len(profiles) - 0.5]) fig.tight_layout() #plt.set_ylim([10000, 25000]) plt.savefig('views_per_follower.png', facecolor='white', edgecolor='black') fig = plt.figure(figsize=(21, 14)) plt.title("All-time Views") total_views -= profiles[0].views + profiles[1].views profiles = profiles[2:] average_views = total_views / len(profiles) ydata = [prof.views for prof in profiles] xticks = [prof.name for prof in profiles] plt.xlabel('Profile') plt.ylabel('All Time Answer Views') plt.bar(range(len(profiles)), ydata, align='center') plt.xticks(range(len(profiles)), xticks, rotation=90) plt.hlines(average_views, -0.5, len(profiles) - 0.5, label='Average', colors='red') #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0)) #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) plt.xlim([-0.5, len(profiles) - 0.5]) fig.tight_layout() #plt.set_ylim([10000, 25000]) plt.savefig('all_time_views.png', facecolor='white', edgecolor='black')
def profile_with_max_answers(limit=20): profiles = Profile.select().order_by( Profile.total_answers.desc()).limit(limit) total_answers = 0 total_views = 0 # Top Writer tw = {} with open('top_writers_2016.json', 'r') as fstream: writer_list = json.load(fstream) for writer in writer_list: tw[writer['uname']] = 0 top_writer_count = 0 print('Top %d Writers with most answers - ' % limit) for profile in profiles: u = PROFILE_URL.format(profile.uname) print(u'{0} ({1} answers)'.format(u, profile.total_answers)) total_answers += profile.total_answers total_views += profile.views if profile.uname in tw: top_writer_count += 1 avg_views_per_answer = total_views / total_answers avg_views_per_user = total_views / len(profiles) avg_ans_per_user = total_answers / len(profiles) print("Total number of answers is %d" % total_answers) print("Average number of answers per user is %d" % avg_ans_per_user) print("Average number of views per answer is %d" % avg_views_per_answer) print("Average all time views for a user is %d" % avg_views_per_user) print("Number of Top Writers 2016 in this list is %d" % top_writer_count) fig = plt.figure(figsize=(21, 14)) plt.title("Users with most answers on Quora") ydata = [prof.total_answers for prof in profiles] xticks = [prof.name for prof in profiles] plt.xlabel('Profile') plt.ylabel('Number of answers written') plt.bar(range(len(profiles)), ydata, align='center') plt.xticks(range(len(profiles)), xticks, rotation=90) plt.hlines(avg_ans_per_user, -0.5, len(profiles) - 0.5, label='Average Number of Answers', colors='red') #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0)) #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) plt.xlim([-0.5, len(profiles) - 0.5]) plt.legend() fig.tight_layout() #plt.set_ylim([10000, 25000]) plt.savefig('most_answers.png', facecolor='white', edgecolor='black') fig = plt.figure(figsize=(21, 14)) plt.title("Average views on each answer for users with most answers on Quora") ydata = [float(prof.views) / prof.total_answers for prof in profiles] xticks = [prof.name for prof in profiles] plt.xlabel('Profile') plt.ylabel('All time views on answers / Number of answers') plt.bar(range(len(profiles)), ydata, align='center') plt.xticks(range(len(profiles)), xticks, rotation=90) plt.hlines(avg_views_per_answer, -0.5, len(profiles) - 0.5, label='Average Number of views / answer', colors='red') #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0)) #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) plt.xlim([-0.5, len(profiles) - 0.5]) plt.legend() fig.tight_layout() #plt.set_ylim([10000, 25000]) plt.savefig('view_avg_most_ans.png', facecolor='white', edgecolor='black')