def parse_answer(answer): # Fetching answer resp = get_page(ANSWER_URL.format(answer.question, answer.writer_uname)) doc = BeautifulSoup(resp, 'html.parser', parse_only=ANSWER_STRAIN) # Get Credible Users who have upvoted this answer users = doc.find('div', class_=CREDIBILITY_CLASS).find_all('a', class_='user') for user in users: Profile.create_or_get(uname=user['href'].split('/')[2], name=user.string) # Update answer stats answer.views = int( doc.find('div', class_=VIEW_ROW_CLASS).strong.string.replace(',', '')) answer.upvotes = int( doc.find('a', class_=UPVOTE_ROW_CLASS).strong.string.replace(',', '')) answer.last_parsed = datetime.datetime.now() answer.save() # Saving the HTML code of the profile # Storing Answers in not feasible. #filename = str(answer.id) + '.html' #with open(os.path.join(ANSWERS_FOLDER, filename), 'w+') as fstream: # fstream.write(resp) sys.stdout.write('\rDone Parsing Answer id %d (%d)' % (answer.id, len(users))) sys.stdout.flush()
def parse_answer(answer): # Fetching answer resp = get_page(ANSWER_URL.format(answer.question, answer.writer_uname)) doc = BeautifulSoup(resp, 'html.parser', parse_only=ANSWER_STRAIN) # Get Credible Users who have upvoted this answer users = doc.find('div', class_=CREDIBILITY_CLASS).find_all('a', class_='user') for user in users: Profile.create_or_get(uname=user['href'].split('/')[2], name=user.string) # Update answer stats answer.views = int(doc.find('div', class_=VIEW_ROW_CLASS).strong.string .replace(',', '')) answer.upvotes = int(doc.find('a', class_=UPVOTE_ROW_CLASS).strong.string .replace(',', '')) answer.last_parsed = datetime.datetime.now() answer.save() # Saving the HTML code of the profile filename = str(answer.id) + '.html' with open(os.path.join(ANSWERS_FOLDER, filename), 'w+') as fstream: fstream.write(resp) sys.stdout.write('\rDone Parsing Answer id %d (%d)' % (answer.id, len(users))) sys.stdout.flush()
def profile_with_max_views(limit=20): profiles = Profile.select().order_by(Profile.views.desc()).limit(limit) print("Top %d Writers with most answers - " % limit) for profile in profiles: u = PROFILE_URL.format(profile.uname) print("{0} ({1} views)".format(u, profile.views))
def profile_with_max_followers(limit=20): profiles = Profile.select().order_by(Profile.followers.desc()).limit(limit) print("Top %d Most followed - " % limit) for profile in profiles: u = PROFILE_URL.format(profile.uname) print("{0} ({1} followers)".format(u, profile.followers))
def profile_with_max_followers(limit=20): profiles = Profile.select().order_by(Profile.followers.desc()).limit(limit) print('Top %d Most followed - ' % limit) for profile in profiles: u = PROFILE_URL.format(profile.uname) print('{0} ({1} followers)'.format(u, profile.followers))
def profile_with_max_views(limit=20): profiles = Profile.select().order_by(Profile.views.desc()).limit(limit) print('Top %d Writers with most answers - ' % limit) for profile in profiles: u = PROFILE_URL.format(profile.uname) print('{0} ({1} views)'.format(u, profile.views))
def answers_with_max_upvotes(limit=20): answers = Answer.select().order_by(Answer.upvotes.desc()).limit(limit) print('Top %d Most upvoted answers - ' % limit) total_views = 0 total_upvotes = 0 writer = {} for answer in answers: u = ANSWER_URL.format(answer.question, answer.writer_uname).encode("UTF-8") print("{0} ({1}, {2}, {3})".format(u, answer.upvotes, answer.views, answer.views / answer.upvotes)) total_upvotes += answer.upvotes total_views += answer.views writer[answer.writer_uname] = writer.get(answer.writer_uname, 0) + 1 print("Total Views = {0}".format(total_views)) print("Total Upvotes = {0}".format(total_upvotes)) print("Average Views = {0}".format(total_views / limit)) print("Average Upvotes = {0}".format(total_upvotes / limit)) avg_up = (float(total_upvotes) / float(total_views)) * 100 print("On an average %.2f viewers upvoted the answer" % avg_up) # Writer Stat with open('top_writers_2016.json', 'r') as fstream: writer_list = json.load(fstream) notw = 0 for w in writer_list: if w['uname'] in writer: notw += 1 print("{0} People on this list are Top Writers(2016)".format(notw)) sorted_writer = sorted(writer.items(), key=operator.itemgetter(1), reverse=True) print("Total number of unique writers is {0}".format(len(sorted_writer))) total_followers = 0 total_answers = 0 for tup in sorted_writer: profile = Profile.get(Profile.uname == tup[0]) total_followers += int(profile.followers) total_answers += int(profile.total_answers) print("Average number of followers of each {0}".format( total_followers / len(sorted_writer))) print("Average number of answers written by each is {}".format( total_answers / len(sorted_writer))) # Plotting Graph figure = mpplt.figure(figsize=(10, 10)) plt = figure.add_subplot(1, 1, 1) plt.set_title("Views vs Upvote") plt.plot([answer.views for answer in answers], [answer.upvotes for answer in answers], '.', color='green') plt.set_xlabel('Views') plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0)) plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) plt.set_xlim([0, 1500000]) plt.set_ylim([10000, 25000]) plt.set_ylabel('Upvotes') figure.savefig('view_upvote.png', facecolor='white', edgecolor='black')
def answers_with_max_upvotes(limit=20): answers = Answer.select().order_by(Answer.upvotes.desc()).limit(limit) print("Top %d Most upvoted answers - " % limit) total_views = 0 total_upvotes = 0 writer = {} for answer in answers: u = ANSWER_URL.format(answer.question, answer.writer_uname).encode("UTF-8") print("{0} ({1}, {2}, {3})".format(u, answer.upvotes, answer.views, answer.views / answer.upvotes)) total_upvotes += answer.upvotes total_views += answer.views writer[answer.writer_uname] = writer.get(answer.writer_uname, 0) + 1 print("Total Views = {0}".format(total_views)) print("Total Upvotes = {0}".format(total_upvotes)) print("Average Views = {0}".format(total_views / limit)) print("Average Upvotes = {0}".format(total_upvotes / limit)) avg_up = (float(total_upvotes) / float(total_views)) * 100 print("On an average %.2f viewers upvoted the answer" % avg_up) # Writer Stat with open("top_writers_2016.json", "r") as fstream: writer_list = json.load(fstream) notw = 0 for w in writer_list: if w["uname"] in writer: notw += 1 print("{0} People on this list are Top Writers(2016)".format(notw)) sorted_writer = sorted(writer.items(), key=operator.itemgetter(1), reverse=True) print("Total number of unique writers is {0}".format(len(sorted_writer))) total_followers = 0 total_answers = 0 for tup in sorted_writer: profile = Profile.get(Profile.uname == tup[0]) total_followers += int(profile.followers) total_answers += int(profile.total_answers) print("Average number of followers of each {0}".format(total_followers / len(sorted_writer))) print("Average number of answers written by each is {}".format(total_answers / len(sorted_writer))) # Plotting Graph figure = mpplt.figure(figsize=(10, 10)) plt = figure.add_subplot(1, 1, 1) plt.set_title("Views vs Upvote") plt.plot([answer.views for answer in answers], [answer.upvotes for answer in answers], ".", color="green") plt.set_xlabel("Views") plt.ticklabel_format(style="sci", axis="x", scilimits=(0, 0)) plt.ticklabel_format(style="sci", axis="y", scilimits=(0, 0)) plt.set_xlim([0, 1500000]) plt.set_ylim([10000, 25000]) plt.set_ylabel("Upvotes") figure.savefig("view_upvote.png", facecolor="white", edgecolor="black")
def gateway(): if is_logged_in(): return redirect(url_for("index")) err = "" if request.method == "POST": try: email = fix(request.form["email"], "@\.") username = fix(request.form["username"]) password = fix(request.form["password"]) confirm = fix(request.form["confirm"]) if validate_email(email): if not Profile.already_exists(email=email): if validate_username(username): if not Profile.already_exists(username=username): if password == confirm and password != "": Profile.insert_profile(email, username, hash_password(password)) return render_template("gateway.html", title="Gateway") else: err = "Invalid Passwords Entered" else: err = "Username Already Exists" else: err = "Invalid Username Entered" else: err = "Email Already Exists" else: err = "Invalid Email Entered" except Exception as e: email = fix(request.form["email"], "@\.") password = fix(request.form["password"]) if email is not None and password is not None: if Profile.already_exists(email=email): expected_pass = Profile.get_password_hash(email) salt, pass_hash = str(expected_pass).split("$") if expected_pass == str(hash_password(password, salt)): session["email"] = email session["username"] = Profile.get("username", email=email) return redirect(url_for("index")) else: err = "Incorrect Email or Password" else: err = "Incorrect Email" else: err = "Fill in all blanks" reg = True if request.args.get("reg") == "true" else False return render_template("gateway.html", title="Gateway", indent=False, reg=reg, err=err)
def profile_with_max_edits(limit=20): profiles = Profile.select().order_by(Profile.edits.desc()).limit(limit) # Top Writer tw = {} with open('top_writers_2016.json', 'r') as fstream: writer_list = json.load(fstream) for writer in writer_list: tw[writer['uname']] = 0 top_writer_count = 0 total_edits = 0 for profile in profiles: u = PROFILE_URL.format(profile.uname) print(u'{0} ({1} edits)'.format(u, profile.posts)) total_edits += profile.posts if profile.uname in tw: top_writer_count += 1 avg_edits = total_edits / len(profiles) print("Total number of edits is %d" % total_edits) print("Average number of edits by a single user is %d" % avg_edits) print("Number on Top Writers 2016 on this list is %d" % top_writer_count) # Visualisation fig = plt.figure(figsize=(21, 14)) plt.title("Users with Most Edits on Quora") ydata = [prof.edits for prof in profiles] xticks = [prof.name for prof in profiles] plt.xlabel('User') plt.ylabel('Posts Written by User') plt.bar(range(len(profiles)), ydata, align='center') plt.xticks(range(len(profiles)), xticks, rotation=90) plt.hlines(avg_edits, -0.5, len(profiles) - 0.5, label='Average', colors='red') #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0)) #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) plt.xlim([-0.5, len(profiles) - 0.5]) fig.tight_layout() #plt.set_ylim([10000, 25000]) plt.savefig('most_posts.png', facecolor='white', edgecolor='black')
action='store_true', help='Do not Crawl Profiles') parser.add_argument('--no_answer', action='store_true', help='Do not Crawl Answers') args = parser.parse_args() # Filling Database with Top Writers 2016 with open('top_writers_2016.json', 'r') as fstream: writer_list = json.load(fstream) with open('other_writers.json', 'r') as fstream: writer_list += json.load(fstream) create_directory(ANSWERS_FOLDER) create_directory(PROFILE_FOLDER) for writer in writer_list: new = Profile.create_or_get(uname=writer['uname'], name=writer['name'])[1] if new: print(u'New Profile %s Created' % writer['uname']) #print "Number of Writers Added = ", len(writer_list) # Starting to Crawl total_parsing = 0 max_crawl = args.max_crawl while total_parsing < max_crawl: if not args.no_profile: # Parse Old Profiles old_time = datetime.datetime.now() - datetime.timedelta(days=7) old_profiles = Profile.select().where( Profile.last_parsed <= old_time).limit(max_crawl - total_parsing) total_parsing += len(old_profiles) print "Number of Profiles to Crawl - ", len(old_profiles)
help='Number of maximum requests to make') parser.add_argument('--no_profile', action='store_true', help='Do not Crawl Profiles') parser.add_argument('--no_answer', action='store_true', help='Do not Crawl Answers') args = parser.parse_args() # Filling Database with Top Writers 2016 with open('top_writers_2016.json', 'r') as fstream: writer_list = json.load(fstream) with open('other_writers.json', 'r') as fstream: writer_list += json.load(fstream) create_directory(ANSWERS_FOLDER) create_directory(PROFILE_FOLDER) for writer in writer_list: new = Profile.create_or_get(uname=writer['uname'], name=writer['name'])[1] if new: print(u'New Profile %s Created' % writer['uname']) #print "Number of Writers Added = ", len(writer_list) # Starting to Crawl total_parsing = 0 max_crawl = args.max_crawl while total_parsing < max_crawl: if not args.no_profile: # Parse Old Profiles old_time = datetime.datetime.now() - datetime.timedelta(days=7) old_profiles = Profile.select().where( Profile.last_parsed <= old_time).limit(max_crawl - total_parsing) total_parsing += len(old_profiles) print "Number of Profiles to Crawl - ", len(old_profiles) for profile in old_profiles:
def profile_with_max_views(limit=200): profiles = Profile.select().order_by(Profile.views.desc()).limit(limit) total_views = 0 total_followers = 0 total_following = 0 total_answers = 0 total_views = 0 # Top Writer tw = {} with open('top_writers_2016.json', 'r') as fstream: writer_list = json.load(fstream) for writer in writer_list: tw[writer['uname']] = 0 top_writer_count = 0 print('Top %d Writers with most answers - ' % limit) for profile in profiles: u = PROFILE_URL.format(profile.uname) print(u'{0} ({1})'.format(u, profile.views)) total_views += profile.views total_followers += profile.followers total_following += profile.following total_views += profile.views total_answers += profile.answers if profile.uname in tw: top_writer_count += 1 average_per_f = total_views / total_followers # Per Follower Plot fig = plt.figure(figsize=(21, 14)) plt.title("Views / Follower for Most Viewed Writers") ydata = [float(prof.views) / prof.followers for prof in profiles] xticks = [prof.name for prof in profiles] plt.xlabel('Profile') plt.ylabel('Views per Follower') plt.bar(range(len(profiles)), ydata, align='center') plt.xticks(range(len(profiles)), xticks, rotation=90) plt.hlines(average_per_f, -0.5, len(profiles) - 0.5, label='Average', colors='red') #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0)) #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) plt.xlim([-0.5, len(profiles) - 0.5]) fig.tight_layout() #plt.set_ylim([10000, 25000]) plt.savefig('views_per_follower.png', facecolor='white', edgecolor='black') fig = plt.figure(figsize=(21, 14)) plt.title("All-time Views") total_views -= profiles[0].views + profiles[1].views profiles = profiles[2:] average_views = total_views / len(profiles) ydata = [prof.views for prof in profiles] xticks = [prof.name for prof in profiles] plt.xlabel('Profile') plt.ylabel('All Time Answer Views') plt.bar(range(len(profiles)), ydata, align='center') plt.xticks(range(len(profiles)), xticks, rotation=90) plt.hlines(average_views, -0.5, len(profiles) - 0.5, label='Average', colors='red') #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0)) #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) plt.xlim([-0.5, len(profiles) - 0.5]) fig.tight_layout() #plt.set_ylim([10000, 25000]) plt.savefig('all_time_views.png', facecolor='white', edgecolor='black')
def profile_with_max_answers(limit=20): profiles = Profile.select().order_by( Profile.total_answers.desc()).limit(limit) total_answers = 0 total_views = 0 # Top Writer tw = {} with open('top_writers_2016.json', 'r') as fstream: writer_list = json.load(fstream) for writer in writer_list: tw[writer['uname']] = 0 top_writer_count = 0 print('Top %d Writers with most answers - ' % limit) for profile in profiles: u = PROFILE_URL.format(profile.uname) print(u'{0} ({1} answers)'.format(u, profile.total_answers)) total_answers += profile.total_answers total_views += profile.views if profile.uname in tw: top_writer_count += 1 avg_views_per_answer = total_views / total_answers avg_views_per_user = total_views / len(profiles) avg_ans_per_user = total_answers / len(profiles) print("Total number of answers is %d" % total_answers) print("Average number of answers per user is %d" % avg_ans_per_user) print("Average number of views per answer is %d" % avg_views_per_answer) print("Average all time views for a user is %d" % avg_views_per_user) print("Number of Top Writers 2016 in this list is %d" % top_writer_count) fig = plt.figure(figsize=(21, 14)) plt.title("Users with most answers on Quora") ydata = [prof.total_answers for prof in profiles] xticks = [prof.name for prof in profiles] plt.xlabel('Profile') plt.ylabel('Number of answers written') plt.bar(range(len(profiles)), ydata, align='center') plt.xticks(range(len(profiles)), xticks, rotation=90) plt.hlines(avg_ans_per_user, -0.5, len(profiles) - 0.5, label='Average Number of Answers', colors='red') #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0)) #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) plt.xlim([-0.5, len(profiles) - 0.5]) plt.legend() fig.tight_layout() #plt.set_ylim([10000, 25000]) plt.savefig('most_answers.png', facecolor='white', edgecolor='black') fig = plt.figure(figsize=(21, 14)) plt.title("Average views on each answer for users with most answers on Quora") ydata = [float(prof.views) / prof.total_answers for prof in profiles] xticks = [prof.name for prof in profiles] plt.xlabel('Profile') plt.ylabel('All time views on answers / Number of answers') plt.bar(range(len(profiles)), ydata, align='center') plt.xticks(range(len(profiles)), xticks, rotation=90) plt.hlines(avg_views_per_answer, -0.5, len(profiles) - 0.5, label='Average Number of views / answer', colors='red') #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0)) #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) plt.xlim([-0.5, len(profiles) - 0.5]) plt.legend() fig.tight_layout() #plt.set_ylim([10000, 25000]) plt.savefig('view_avg_most_ans.png', facecolor='white', edgecolor='black')
def answers_with_max_upvotes(limit=20): answers = Answer.select().order_by(Answer.upvotes.desc()).limit(limit) print('Top %d Most upvoted answers - ' % limit) total_views = 0 total_upvotes = 0 writer = {} max_views = 0 for answer in answers: u = ANSWER_URL.format(answer.question, answer.writer_uname).encode("UTF-8") print("{0} ({1}, {2}, {3})".format(u, answer.upvotes, answer.views, answer.views / answer.upvotes)) total_upvotes += answer.upvotes total_views += answer.views writer[answer.writer_uname] = writer.get(answer.writer_uname, 0) + 1 if answer.views > max_views : max_views = answer.views print "Max - ", u, answer.views print("Total Views = {0}".format(total_views)) print("Total Upvotes = {0}".format(total_upvotes)) print("Average Views = {0}".format(total_views / limit)) print("Average Upvotes = {0}".format(total_upvotes / limit)) avg_up = (float(total_upvotes) / float(total_views)) * 100 print("On an average %.2f viewers upvoted the answer" % avg_up) # Writer Stat with open('top_writers_2016.json', 'r') as fstream: writer_list = json.load(fstream) notw = 0 for w in writer_list: if w['uname'] in writer: notw += 1 print("{0} People on this list are Top Writers(2016)".format(notw)) sorted_writer = sorted(writer.items(), key=operator.itemgetter(1), reverse=True) print sorted_writer[:10] print("Total number of unique writers is {0}".format(len(sorted_writer))) total_followers = 0 total_answers = 0 for tup in sorted_writer: profile = Profile.get(Profile.uname == tup[0]) total_followers += int(profile.followers) total_answers += int(profile.total_answers) print("Average number of followers of each {0}".format( total_followers / len(sorted_writer))) print("Average number of answers written by each is {}".format( total_answers / len(sorted_writer))) # Plotting Graph figure = plt.figure(figsize=(10, 10)) splt = figure.add_subplot(1, 1, 1) splt.set_title("Views vs Upvote") splt.plot([answer.views for answer in answers], [answer.upvotes for answer in answers], '.', color='green') splt.set_xlabel('Views') splt.ticklabel_format(style='sci', axis='x', scilimits=(0,0)) splt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) splt.set_xlim([0, 1500000]) splt.set_ylim([10000, 25000]) splt.set_ylabel('Upvotes') figure.tight_layout() figure.savefig('view_upvote.png', facecolor='white', edgecolor='black')
def profile(username): if not Profile.already_exists(username=username): return redirect(url_for("index")) return render_template("profile.html", title="%s's Profile" % username, username=username)
"https://s3.amazonaws.com/uifaces/faces/twitter/langate/128.jpg", "https://s3.amazonaws.com/uifaces/faces/twitter/anoff/128.jpg", "https://s3.amazonaws.com/uifaces/faces/twitter/osvaldas/128.jpg", "https://s3.amazonaws.com/uifaces/faces/twitter/jayrobinson/128.jpg", "https://s3.amazonaws.com/uifaces/faces/twitter/jm_denis/128.jpg", "https://s3.amazonaws.com/uifaces/faces/twitter/brandclay/128.jpg", "https://s3.amazonaws.com/uifaces/faces/twitter/VinThomas/128.jpg", "https://s3.amazonaws.com/uifaces/faces/twitter/_victa/128.jpg", "https://s3.amazonaws.com/uifaces/faces/twitter/saschamt/128.jpg", "https://s3.amazonaws.com/uifaces/faces/twitter/victorerixon/128.jpg", "https://s3.amazonaws.com/uifaces/faces/twitter/noxdzine/128.jpg", "https://s3.amazonaws.com/uifaces/faces/twitter/yalozhkin/128.jpg", "https://s3.amazonaws.com/uifaces/faces/twitter/flame_kaizar/128.jpg", "https://s3.amazonaws.com/uifaces/faces/twitter/baliomega/128.jpg", "https://s3.amazonaws.com/uifaces/faces/twitter/terryxlife/128.jpg" ] for j in "abcdefghijklmnoprst": for i, name in enumerate(NAMES): email = name.lower().replace(" ", ".") + ("*****@*****.**" % (j)) face = FACES[i] user = User(email=email, active=True, display_name=name, photo_url=face) profile = Profile(external_key=random_address_hash(), data_json="{}") user.profiles.append(profile) db.session.add(user) db.session.commit() print "Added %s" % (name)
def profile_with_max_followers(limit=20): profiles = Profile.select().order_by(Profile.followers.desc()).limit(limit) total_views = 0 total_followers = 0 total_following = 0 total_answers = 0 total_views = 0 # Top Writer tw = {} with open('top_writers_2016.json', 'r') as fstream: writer_list = json.load(fstream) for writer in writer_list: tw[writer['uname']] = 0 top_writer_count = 0 print('Top %d Most followed - ' % limit) for profile in profiles: u = PROFILE_URL.format(profile.uname) print(u'{0} ({1} followers)'.format(u, profile.followers)) total_views += profile.views total_followers += profile.followers total_following += profile.following total_views += profile.views total_answers += profile.total_answers if profile.uname in tw: top_writer_count += 1 avg_followers = total_followers / len(profiles) avg_following = total_following / len(profiles) avg_answers = total_answers / len(profiles) avg_views = total_views / len(profiles) avg_v_gain_f = total_views / total_followers avg_f_gain_ans = total_followers / total_answers print("Total number of all-time views is %d" % total_views) print("Average number of all-time views is %d" % avg_views) print("Average number of followers is %d" % avg_followers) print("Average number of views gained per follower is %d" % avg_v_gain_f) print("Total number of answers written by them is %d" % total_answers) print("Average number of answers written by each is %d" % avg_answers) print("Average number of people these writers follow is %d" % avg_following) print("Average number of followers gained per answer is %d" % avg_f_gain_ans) print("Number on Top Writers 2016 is %d" % top_writer_count) fig = plt.figure(figsize=(21, 14)) plt.title("Users with most followers on Quora") ydata = [prof.followers for prof in profiles] xticks = [prof.name for prof in profiles] plt.xlabel('Profile') plt.ylabel('Number of followers') plt.bar(range(len(profiles)), ydata, align='center') plt.xticks(range(len(profiles)), xticks, rotation=90) plt.hlines(avg_followers, -0.5, len(profiles) - 0.5, label='Average', colors='red') #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0)) #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) plt.xlim([-0.5, len(profiles) - 0.5]) fig.tight_layout() #plt.set_ylim([10000, 25000]) plt.savefig('most_followed_users.png', facecolor='white', edgecolor='black') ff_ratio = [] xticks = [] fa_ratio = [] for p in profiles: if p.total_answers > 10 and p.following > 10 and \ p.followers / p.following < 4000: ff_ratio.append(float(p.followers) / p.following) fa_ratio.append(float(p.followers) / p.total_answers) xticks.append(p.name) print(len(ff_ratio)) print(len(fa_ratio)) avg_ff = float(sum(ff_ratio)) / len(ff_ratio) avg_fa = float(sum(fa_ratio)) / len(fa_ratio) xpos_ff = range(len(ff_ratio)) xpos_fa = [x + 0.4 for x in range(len(fa_ratio))] tick_pos = [x + 0.4 for x in range(len(fa_ratio))] fig = plt.figure(figsize=(21, 14)) plt.title("Metrics of Users with most followers on Quora") bar1 = plt.bar(xpos_ff, ff_ratio, 0.35, color='red') bar2 = plt.bar(xpos_fa, fa_ratio, 0.35, color='blue') l1 = plt.hlines(avg_ff, -0.5, len(ff_ratio), linestyles='dashed', color='red', label='Average Followers / Following ratio') l2 = plt.hlines(avg_fa, -0.5, len(ff_ratio), linestyles='dashed', color='blue', label='Average Followers / Answers ratio') plt.xlabel('Users') plt.ylabel('Ratio Value') plt.xticks(tick_pos, xticks, rotation=90) plt.legend((bar1[0], bar2[0], l1, l2), ("Follower / Following Ratio", "Follower / Answer Ratio", "Average Followers / Following ratio", "Average Followers / Answers ratio")) plt.xlim([-0.5, len(ff_ratio)]) fig.tight_layout() plt.savefig('user_metric.png', facecolor='white', edgecolor='black')