def profile_with_max_views(limit=20):
    profiles = Profile.select().order_by(Profile.views.desc()).limit(limit)

    print("Top %d Writers with most answers - " % limit)
    for profile in profiles:
        u = PROFILE_URL.format(profile.uname)
        print("{0} ({1} views)".format(u, profile.views))
Beispiel #2
0
def profile_with_max_followers(limit=20):
  profiles = Profile.select().order_by(Profile.followers.desc()).limit(limit)

  print('Top %d Most followed - ' % limit)
  for profile in profiles:
    u = PROFILE_URL.format(profile.uname)
    print('{0} ({1} followers)'.format(u, profile.followers))
def profile_with_max_followers(limit=20):
    profiles = Profile.select().order_by(Profile.followers.desc()).limit(limit)

    print("Top %d Most followed - " % limit)
    for profile in profiles:
        u = PROFILE_URL.format(profile.uname)
        print("{0} ({1} followers)".format(u, profile.followers))
Beispiel #4
0
def profile_with_max_views(limit=20):
  profiles = Profile.select().order_by(Profile.views.desc()).limit(limit)

  print('Top %d Writers with most answers - ' % limit)
  for profile in profiles:
    u = PROFILE_URL.format(profile.uname)
    print('{0} ({1} views)'.format(u, profile.views))
def profile_with_max_edits(limit=20):
  profiles = Profile.select().order_by(Profile.edits.desc()).limit(limit)

  # Top Writer
  tw = {}
  with open('top_writers_2016.json', 'r') as fstream:
    writer_list = json.load(fstream)
  for writer in writer_list:
    tw[writer['uname']] = 0
  top_writer_count = 0
  total_edits = 0

  for profile in profiles:
    u = PROFILE_URL.format(profile.uname)
    print(u'{0} ({1} edits)'.format(u, profile.posts))
    total_edits += profile.posts
    if profile.uname in tw:
      top_writer_count += 1

  avg_edits = total_edits / len(profiles)
  print("Total number of edits is %d" % total_edits)
  print("Average number of edits by a single user is %d" % avg_edits)
  print("Number on Top Writers 2016 on this list is %d" % top_writer_count)

  # Visualisation
  fig = plt.figure(figsize=(21, 14))
  plt.title("Users with Most Edits on Quora")
  ydata = [prof.edits for prof in profiles]
  xticks = [prof.name for prof in profiles]
  plt.xlabel('User')
  plt.ylabel('Posts Written by User')
  plt.bar(range(len(profiles)), ydata, align='center')
  plt.xticks(range(len(profiles)), xticks, rotation=90)
  plt.hlines(avg_edits, -0.5, len(profiles) - 0.5, label='Average',
             colors='red')
  #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
  #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
  plt.xlim([-0.5, len(profiles) - 0.5])
  fig.tight_layout()
  #plt.set_ylim([10000, 25000])
  plt.savefig('most_posts.png', facecolor='white', edgecolor='black')
    create_directory(PROFILE_FOLDER)
    for writer in writer_list:
        new = Profile.create_or_get(uname=writer['uname'],
                                    name=writer['name'])[1]
        if new: print(u'New Profile %s Created' % writer['uname'])
    #print "Number of Writers Added = ", len(writer_list)

    # Starting to Crawl
    total_parsing = 0
    max_crawl = args.max_crawl
    while total_parsing < max_crawl:
        if not args.no_profile:
            # Parse Old Profiles
            old_time = datetime.datetime.now() - datetime.timedelta(days=7)
            old_profiles = Profile.select().where(
                Profile.last_parsed <= old_time).limit(max_crawl -
                                                       total_parsing)
            total_parsing += len(old_profiles)
            print "Number of Profiles to Crawl - ", len(old_profiles)
            for profile in old_profiles:
                try:
                    parse_profile(profile)
                except Exception, err:  #pylint-diasble:
                    print err
                    traceback.print_exc(file=sys.stdout)
                    pass
            if total_parsing >= max_crawl: break

        if not args.no_answer:
            # Parse Old Answers
            old_time = datetime.datetime.now() - datetime.timedelta(days=7)
    writer_list += json.load(fstream)
  create_directory(ANSWERS_FOLDER)
  create_directory(PROFILE_FOLDER)
  for writer in writer_list:
    new = Profile.create_or_get(uname=writer['uname'], name=writer['name'])[1]
    if new: print(u'New Profile %s Created' % writer['uname'])
  #print "Number of Writers Added = ", len(writer_list)

  # Starting to Crawl
  total_parsing = 0
  max_crawl = args.max_crawl
  while total_parsing < max_crawl:
    if not args.no_profile:
      # Parse Old Profiles
      old_time = datetime.datetime.now() - datetime.timedelta(days=7)
      old_profiles = Profile.select().where(
        Profile.last_parsed <= old_time).limit(max_crawl - total_parsing)
      total_parsing += len(old_profiles)
      print "Number of Profiles to Crawl - ", len(old_profiles)
      for profile in old_profiles:
        try:
          parse_profile(profile)
        except Exception, err: #pylint-diasble:
          print err
          traceback.print_exc(file=sys.stdout)
          pass
      if total_parsing >= max_crawl: break

    if not args.no_answer:
      # Parse Old Answers
      old_time = datetime.datetime.now() - datetime.timedelta(days=7)
      old_answers = Answer.select().where(
def profile_with_max_followers(limit=20):
  profiles = Profile.select().order_by(Profile.followers.desc()).limit(limit)

  total_views = 0
  total_followers = 0
  total_following = 0
  total_answers = 0
  total_views = 0
  # Top Writer
  tw = {}
  with open('top_writers_2016.json', 'r') as fstream:
    writer_list = json.load(fstream)
  for writer in writer_list:
    tw[writer['uname']] = 0
  top_writer_count = 0

  print('Top %d Most followed - ' % limit)
  for profile in profiles:
    u = PROFILE_URL.format(profile.uname)
    print(u'{0} ({1} followers)'.format(u, profile.followers))
    total_views += profile.views
    total_followers += profile.followers
    total_following += profile.following
    total_views += profile.views
    total_answers += profile.total_answers
    if profile.uname in tw:
      top_writer_count += 1

  avg_followers = total_followers / len(profiles)
  avg_following = total_following / len(profiles)
  avg_answers = total_answers / len(profiles)
  avg_views = total_views / len(profiles)
  avg_v_gain_f = total_views / total_followers
  avg_f_gain_ans = total_followers / total_answers

  print("Total number of all-time views is %d" % total_views)
  print("Average number of all-time views is %d" % avg_views)
  print("Average number of followers is %d" % avg_followers)
  print("Average number of views gained per follower is %d" % avg_v_gain_f)
  print("Total number of answers written by them is %d" % total_answers)
  print("Average number of answers written by each is %d" % avg_answers)
  print("Average number of people these writers follow is %d" % avg_following)
  print("Average number of followers gained per answer is %d" % avg_f_gain_ans)
  print("Number on Top Writers 2016 is %d" % top_writer_count)

  fig = plt.figure(figsize=(21, 14))
  plt.title("Users with most followers on Quora")
  ydata = [prof.followers for prof in profiles]
  xticks = [prof.name for prof in profiles]
  plt.xlabel('Profile')
  plt.ylabel('Number of followers')
  plt.bar(range(len(profiles)), ydata, align='center')
  plt.xticks(range(len(profiles)), xticks, rotation=90)
  plt.hlines(avg_followers, -0.5, len(profiles) - 0.5, label='Average',
             colors='red')
  #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
  #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
  plt.xlim([-0.5, len(profiles) - 0.5])
  fig.tight_layout()
  #plt.set_ylim([10000, 25000])
  plt.savefig('most_followed_users.png', facecolor='white', edgecolor='black')

  ff_ratio = []
  xticks = []
  fa_ratio = []
  for p in profiles:
    if p.total_answers > 10 and p.following > 10 and \
      p.followers / p.following < 4000:
      ff_ratio.append(float(p.followers) / p.following)
      fa_ratio.append(float(p.followers) / p.total_answers)
      xticks.append(p.name)

  print(len(ff_ratio))
  print(len(fa_ratio))
  avg_ff = float(sum(ff_ratio)) / len(ff_ratio)
  avg_fa = float(sum(fa_ratio)) / len(fa_ratio)
  xpos_ff = range(len(ff_ratio))
  xpos_fa = [x + 0.4 for x in range(len(fa_ratio))]
  tick_pos = [x + 0.4 for x in range(len(fa_ratio))]

  fig = plt.figure(figsize=(21, 14))
  plt.title("Metrics of Users with most followers on Quora")
  bar1 = plt.bar(xpos_ff, ff_ratio, 0.35, color='red')
  bar2 = plt.bar(xpos_fa, fa_ratio, 0.35, color='blue')
  l1 = plt.hlines(avg_ff, -0.5, len(ff_ratio), linestyles='dashed', color='red',
             label='Average Followers / Following ratio')
  l2 = plt.hlines(avg_fa, -0.5, len(ff_ratio), linestyles='dashed',
                  color='blue', label='Average Followers / Answers ratio')
  plt.xlabel('Users')
  plt.ylabel('Ratio Value')
  plt.xticks(tick_pos, xticks, rotation=90)
  plt.legend((bar1[0], bar2[0], l1, l2),
             ("Follower / Following Ratio",
              "Follower / Answer Ratio",
              "Average Followers / Following ratio",
              "Average Followers / Answers ratio"))
  plt.xlim([-0.5, len(ff_ratio)])
  fig.tight_layout()
  plt.savefig('user_metric.png', facecolor='white', edgecolor='black')
def profile_with_max_views(limit=200):
  profiles = Profile.select().order_by(Profile.views.desc()).limit(limit)

  total_views = 0
  total_followers = 0
  total_following = 0
  total_answers = 0
  total_views = 0

  # Top Writer
  tw = {}
  with open('top_writers_2016.json', 'r') as fstream:
    writer_list = json.load(fstream)
  for writer in writer_list:
    tw[writer['uname']] = 0
  top_writer_count = 0

  print('Top %d Writers with most answers - ' % limit)
  for profile in profiles:
    u = PROFILE_URL.format(profile.uname)
    print(u'{0} ({1})'.format(u, profile.views))
    total_views += profile.views
    total_followers += profile.followers
    total_following += profile.following
    total_views += profile.views
    total_answers += profile.answers

    if profile.uname in tw:
      top_writer_count += 1

  average_per_f = total_views / total_followers

  # Per Follower Plot
  fig = plt.figure(figsize=(21, 14))
  plt.title("Views / Follower for Most Viewed Writers")
  ydata = [float(prof.views) / prof.followers for prof in profiles]
  xticks = [prof.name for prof in profiles]
  plt.xlabel('Profile')
  plt.ylabel('Views per Follower')
  plt.bar(range(len(profiles)), ydata, align='center')
  plt.xticks(range(len(profiles)), xticks, rotation=90)
  plt.hlines(average_per_f, -0.5, len(profiles) - 0.5, label='Average',
             colors='red')
  #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
  #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
  plt.xlim([-0.5, len(profiles) - 0.5])
  fig.tight_layout()
  #plt.set_ylim([10000, 25000])
  plt.savefig('views_per_follower.png', facecolor='white', edgecolor='black')

  fig = plt.figure(figsize=(21, 14))
  plt.title("All-time Views")
  total_views -= profiles[0].views + profiles[1].views
  profiles = profiles[2:]
  average_views = total_views / len(profiles)
  ydata = [prof.views for prof in profiles]
  xticks = [prof.name for prof in profiles]
  plt.xlabel('Profile')
  plt.ylabel('All Time Answer Views')
  plt.bar(range(len(profiles)), ydata, align='center')
  plt.xticks(range(len(profiles)), xticks, rotation=90)
  plt.hlines(average_views, -0.5, len(profiles) - 0.5, label='Average',
             colors='red')
  #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
  #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
  plt.xlim([-0.5, len(profiles) - 0.5])
  fig.tight_layout()
  #plt.set_ylim([10000, 25000])
  plt.savefig('all_time_views.png', facecolor='white', edgecolor='black')
def profile_with_max_answers(limit=20):
  profiles = Profile.select().order_by(
    Profile.total_answers.desc()).limit(limit)

  total_answers = 0
  total_views = 0
  # Top Writer
  tw = {}
  with open('top_writers_2016.json', 'r') as fstream:
    writer_list = json.load(fstream)
  for writer in writer_list:
    tw[writer['uname']] = 0
  top_writer_count = 0

  print('Top %d Writers with most answers - ' % limit)
  for profile in profiles:
    u = PROFILE_URL.format(profile.uname)
    print(u'{0} ({1} answers)'.format(u, profile.total_answers))
    total_answers += profile.total_answers
    total_views += profile.views
    if profile.uname in tw:
      top_writer_count += 1

  avg_views_per_answer = total_views / total_answers
  avg_views_per_user = total_views / len(profiles)
  avg_ans_per_user = total_answers / len(profiles)

  print("Total number of answers is %d" % total_answers)
  print("Average number of answers per user is %d" % avg_ans_per_user)
  print("Average number of views per answer is %d" % avg_views_per_answer)
  print("Average all time views for a user is %d" % avg_views_per_user)
  print("Number of Top Writers 2016 in this list is %d" % top_writer_count)

  fig = plt.figure(figsize=(21, 14))
  plt.title("Users with most answers on Quora")
  ydata = [prof.total_answers for prof in profiles]
  xticks = [prof.name for prof in profiles]
  plt.xlabel('Profile')
  plt.ylabel('Number of answers written')
  plt.bar(range(len(profiles)), ydata, align='center')
  plt.xticks(range(len(profiles)), xticks, rotation=90)
  plt.hlines(avg_ans_per_user, -0.5, len(profiles) - 0.5,
             label='Average Number of Answers', colors='red')
  #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
  #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
  plt.xlim([-0.5, len(profiles) - 0.5])
  plt.legend()
  fig.tight_layout()
  #plt.set_ylim([10000, 25000])
  plt.savefig('most_answers.png', facecolor='white', edgecolor='black')

  fig = plt.figure(figsize=(21, 14))
  plt.title("Average views on each answer for users with most answers on Quora")
  ydata = [float(prof.views) / prof.total_answers for prof in profiles]
  xticks = [prof.name for prof in profiles]
  plt.xlabel('Profile')
  plt.ylabel('All time views on answers / Number of answers')
  plt.bar(range(len(profiles)), ydata, align='center')
  plt.xticks(range(len(profiles)), xticks, rotation=90)
  plt.hlines(avg_views_per_answer, -0.5, len(profiles) - 0.5,
             label='Average Number of views / answer', colors='red')
  #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
  #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
  plt.xlim([-0.5, len(profiles) - 0.5])
  plt.legend()
  fig.tight_layout()
  #plt.set_ylim([10000, 25000])
  plt.savefig('view_avg_most_ans.png', facecolor='white', edgecolor='black')