Ejemplo n.º 1
0
def answers_with_max_views(limit=20):
    answers = Answer.select().order_by(Answer.views.desc()).limit(limit)

    print("Top %d Most viewed answers - " % limit)
    for answer in answers:
        u = ANSWER_URL.format(answer.question, answer.writer_uname).encode("UTF-8")
        print("{0} ({1} views)".format(u, answer.views))
Ejemplo n.º 2
0
def answers_with_no_upvotes(limit=20):
    answers = Answer.select().where(Answer.upvotes == 0).order_by(Answer.views.desc()).limit(limit)

    print("Answers max views  but no upvotes - ")
    for answer in answers:
        u = ANSWER_URL.format(answer.question, answer.writer_uname).encode("UTF-8")
        print("{0} ({1} views)".format(u, answer.views))
Ejemplo n.º 3
0
def answers_with_max_views(limit=20):
  answers = Answer.select().order_by(Answer.views.desc()).limit(limit)

  print('Top %d Most viewed answers - ' % limit)
  for answer in answers:
    u = ANSWER_URL.format(answer.question, answer.writer_uname).encode("UTF-8")
    print("{0} ({1} views)".format(u,  answer.views))
Ejemplo n.º 4
0
def answers_with_no_upvotes(limit=20):
  answers = Answer.select().where(Answer.upvotes == 0).order_by(
    Answer.views.desc()).limit(limit)

  print('Answers max views  but no upvotes - ')
  for answer in answers:
    u = ANSWER_URL.format(answer.question, answer.writer_uname).encode("UTF-8")
    print("{0} ({1} views)".format(u, answer.views))
Ejemplo n.º 5
0
def answers_with_max_upvotes(limit=20):
  answers = Answer.select().order_by(Answer.upvotes.desc()).limit(limit)

  print('Top %d Most upvoted answers - ' % limit)
  total_views = 0
  total_upvotes = 0
  writer = {}
  for answer in answers:
    u = ANSWER_URL.format(answer.question, answer.writer_uname).encode("UTF-8")
    print("{0} ({1}, {2}, {3})".format(u, answer.upvotes,
          answer.views, answer.views / answer.upvotes))
    total_upvotes += answer.upvotes
    total_views += answer.views
    writer[answer.writer_uname] = writer.get(answer.writer_uname, 0) + 1

  print("Total Views = {0}".format(total_views))
  print("Total Upvotes = {0}".format(total_upvotes))
  print("Average Views = {0}".format(total_views / limit))
  print("Average Upvotes = {0}".format(total_upvotes / limit))
  avg_up = (float(total_upvotes) / float(total_views)) * 100
  print("On an average %.2f viewers upvoted the answer" % avg_up)

  # Writer Stat
  with open('top_writers_2016.json', 'r') as fstream:
    writer_list = json.load(fstream)
  notw = 0
  for w in writer_list:
    if w['uname'] in writer:
      notw += 1
  print("{0} People on this list are Top Writers(2016)".format(notw))
  sorted_writer = sorted(writer.items(), key=operator.itemgetter(1),
                         reverse=True)
  print("Total number of unique writers is {0}".format(len(sorted_writer)))
  total_followers = 0
  total_answers = 0
  for tup in sorted_writer:
    profile = Profile.get(Profile.uname == tup[0])
    total_followers += int(profile.followers)
    total_answers += int(profile.total_answers)
  print("Average number of followers of each {0}".format(
    total_followers / len(sorted_writer)))
  print("Average number of answers written by each is {}".format(
    total_answers / len(sorted_writer)))

  # Plotting Graph
  figure = mpplt.figure(figsize=(10, 10))
  plt = figure.add_subplot(1, 1, 1)
  plt.set_title("Views vs Upvote")
  plt.plot([answer.views for answer in answers],
           [answer.upvotes for answer in answers],
           '.', color='green')
  plt.set_xlabel('Views')
  plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
  plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
  plt.set_xlim([0, 1500000])
  plt.set_ylim([10000, 25000])
  plt.set_ylabel('Upvotes')
  figure.savefig('view_upvote.png', facecolor='white', edgecolor='black')
Ejemplo n.º 6
0
def answers_with_max_upvotes(limit=20):
    answers = Answer.select().order_by(Answer.upvotes.desc()).limit(limit)

    print("Top %d Most upvoted answers - " % limit)
    total_views = 0
    total_upvotes = 0
    writer = {}
    for answer in answers:
        u = ANSWER_URL.format(answer.question, answer.writer_uname).encode("UTF-8")
        print("{0} ({1}, {2}, {3})".format(u, answer.upvotes, answer.views, answer.views / answer.upvotes))
        total_upvotes += answer.upvotes
        total_views += answer.views
        writer[answer.writer_uname] = writer.get(answer.writer_uname, 0) + 1

    print("Total Views = {0}".format(total_views))
    print("Total Upvotes = {0}".format(total_upvotes))
    print("Average Views = {0}".format(total_views / limit))
    print("Average Upvotes = {0}".format(total_upvotes / limit))
    avg_up = (float(total_upvotes) / float(total_views)) * 100
    print("On an average %.2f viewers upvoted the answer" % avg_up)

    # Writer Stat
    with open("top_writers_2016.json", "r") as fstream:
        writer_list = json.load(fstream)
    notw = 0
    for w in writer_list:
        if w["uname"] in writer:
            notw += 1
    print("{0} People on this list are Top Writers(2016)".format(notw))
    sorted_writer = sorted(writer.items(), key=operator.itemgetter(1), reverse=True)
    print("Total number of unique writers is {0}".format(len(sorted_writer)))
    total_followers = 0
    total_answers = 0
    for tup in sorted_writer:
        profile = Profile.get(Profile.uname == tup[0])
        total_followers += int(profile.followers)
        total_answers += int(profile.total_answers)
    print("Average number of followers of each {0}".format(total_followers / len(sorted_writer)))
    print("Average number of answers written by each is {}".format(total_answers / len(sorted_writer)))

    # Plotting Graph
    figure = mpplt.figure(figsize=(10, 10))
    plt = figure.add_subplot(1, 1, 1)
    plt.set_title("Views vs Upvote")
    plt.plot([answer.views for answer in answers], [answer.upvotes for answer in answers], ".", color="green")
    plt.set_xlabel("Views")
    plt.ticklabel_format(style="sci", axis="x", scilimits=(0, 0))
    plt.ticklabel_format(style="sci", axis="y", scilimits=(0, 0))
    plt.set_xlim([0, 1500000])
    plt.set_ylim([10000, 25000])
    plt.set_ylabel("Upvotes")
    figure.savefig("view_upvote.png", facecolor="white", edgecolor="black")
Ejemplo n.º 7
0
                Profile.last_parsed <= old_time).limit(max_crawl -
                                                       total_parsing)
            total_parsing += len(old_profiles)
            print "Number of Profiles to Crawl - ", len(old_profiles)
            for profile in old_profiles:
                try:
                    parse_profile(profile)
                except Exception, err:  #pylint-diasble:
                    print err
                    traceback.print_exc(file=sys.stdout)
                    pass
            if total_parsing >= max_crawl: break

        if not args.no_answer:
            # Parse Old Answers
            old_time = datetime.datetime.now() - datetime.timedelta(days=7)
            old_answers = Answer.select().where(
                Answer.last_parsed <= old_time).limit(max_crawl -
                                                      total_parsing)
            total_parsing += len(old_answers)
            print "Number of Answers to Crawl - ", len(old_answers)
            for answer in old_answers:
                try:
                    parse_answer(answer)
                except Exception, err:
                    print err
                    traceback.print_exc(file=sys.stdout)
                    pass

    print '\n'
      old_time = datetime.datetime.now() - datetime.timedelta(days=7)
      old_profiles = Profile.select().where(
        Profile.last_parsed <= old_time).limit(max_crawl - total_parsing)
      total_parsing += len(old_profiles)
      print "Number of Profiles to Crawl - ", len(old_profiles)
      for profile in old_profiles:
        try:
          parse_profile(profile)
        except Exception, err: #pylint-diasble:
          print err
          traceback.print_exc(file=sys.stdout)
          pass
      if total_parsing >= max_crawl: break

    if not args.no_answer:
      # Parse Old Answers
      old_time = datetime.datetime.now() - datetime.timedelta(days=7)
      old_answers = Answer.select().where(
        Answer.last_parsed <= old_time).limit(max_crawl - total_parsing)
      total_parsing += len(old_answers)
      print "Number of Answers to Crawl - ", len(old_answers)
      for answer in old_answers:
        try:
          parse_answer(answer)
        except Exception, err:
          print err
          traceback.print_exc(file=sys.stdout)
          pass

  print '\n'
Ejemplo n.º 9
0
def best_quora_answers(limit):
  answers = Answer.select().where(Answer.upvotes >= 1000)
  avg_upvotes = sum([answer.upvotes for answer in answers])
  avg_upvotes /= float(len(answers))

  print("Average Upvotes = %d" % avg_upvotes)
  print("%d answers to evaluate = " % len(answers))
  view_upvote_ratio = [] # Each object = [answer_id, view / upvote ratio]
  for idx, answer in enumerate(answers):
    writer = answer.writer
    view_avg = float(writer.views) / writer.total_answers
    quality_index = 0
    quality_index = view_avg / answer.views
    quality_index += answer.upvotes / avg_upvotes
    quality_index += float(answer.upvotes * 50) / float(answer.views)
    view_upvote_ratio.append([idx, quality_index])

  view_upvote_ratio.sort(key=lambda x: x[1], reverse=True)
  limit = min(limit, len(view_upvote_ratio))
  print ("Printing %d answers" % limit)

  print("Top %d answers on Quora are :" % limit)
  view_upvote_ratio = view_upvote_ratio[:limit]
  total_qi = 0
  total_views = 0
  total_upvotes = 0
  writer = {}
  upvote_count = []
  for item in view_upvote_ratio:
    answer = answers[item[0]]
    u = ANSWER_URL.format(answer.question, answer.writer_uname)
    print(u"%s (%.2f, %d, %d)" % (u, item[1], answer.views, answer.upvotes))
    total_qi += item[1]
    total_views += answer.views
    upvote_count.append(answer.upvotes)
    writer[answer.writer_uname] = writer.get(answer.writer_uname, 0) + 1

  with open('top_writers_2016.json', 'r') as fstream:
    writer_list = json.load(fstream)
  notw = 0
  for w in writer_list:
    if w['uname'] in writer:
      notw += 1
  print("{0} People on this list are Top Writers(2016)".format(notw))
  sorted_writer = sorted(writer.items(), key=operator.itemgetter(1),
                         reverse=True)
  print sorted_writer[:10]
  print("Total number of unique writers is {0}".format(len(sorted_writer)))

  avg_qi = total_qi / limit
  total_upvotes = sum(upvote_count)
  avg_upvote = total_upvotes / limit
  avg_view = total_views / limit
  print("Average of Quality Index = %.2f" % avg_qi)
  print("Average number of views = %d" % avg_view)
  print("Average number of upvotes = %d" % avg_upvote)

  fig = plt.figure(figsize=(21, 14))
  plt.title("Quality Answers on Quora (Feb 2016)")

  # Scaling quality
  max_q = max([item[1] for item in view_upvote_ratio])
  min_q = min([item[1] for item in view_upvote_ratio])
  diff_y = max_q - min_q
  ydata = []
  for i in view_upvote_ratio:
    ydata.append((i[1] - min_q) / diff_y)
  xticks = [str(i) for i in range(1, limit + 1)]
  plt.xlabel('Answer Rank')
  plt.ylabel('Quality Index')

  # Scaling Upvote Count
  max_up = max(upvote_count)
  min_up = min(upvote_count)
  diff = max_up - min_up
  for idx, up in enumerate(upvote_count):
    upvote_count[idx] = float(up - min_up) / diff
  yvals = []
  yticks = []
  for i in range(1, 10):
    yvals.append(i * 0.1)
    yticks.append("%.2f" % (min_q + (i * 0.1) * diff_y))
    yvals.append(i * -0.1)
    yticks.append(min_up + (i * 0.1) * diff)
  plt.bar(range(limit), ydata, align='center', color='y')
  plt.bar(range(limit), [-up for up in upvote_count], align='center', color='g')
  plt.xticks(range(limit), xticks)
  plt.yticks(yvals, yticks)
  plt.hlines(sum(ydata) / float(limit), -0.5, limit - 0.5,
             label='Average Quality Index', colors='b')
  plt.hlines(-sum(upvote_count) / float(limit), -0.5, limit - 0.5,
             label='Average Upvote Count', color='r')
  plt.xlim([-0.5, limit - 0.5])
  #fig.tight_layout()
  plt.ylim([-1.1, 1.1])
  plt.legend()
  plt.tight_layout()
  plt.savefig('quality_answers.png', facecolor='white', edgecolor='black')
Ejemplo n.º 10
0
def answers_with_max_upvotes(limit=20):
  answers = Answer.select().order_by(Answer.upvotes.desc()).limit(limit)

  print('Top %d Most upvoted answers - ' % limit)
  total_views = 0
  total_upvotes = 0
  writer = {}
  max_views = 0
  for answer in answers:
    u = ANSWER_URL.format(answer.question, answer.writer_uname).encode("UTF-8")
    print("{0} ({1}, {2}, {3})".format(u, answer.upvotes,
          answer.views, answer.views / answer.upvotes))
    total_upvotes += answer.upvotes
    total_views += answer.views
    writer[answer.writer_uname] = writer.get(answer.writer_uname, 0) + 1
    if answer.views > max_views :
      max_views = answer.views
      print "Max - ", u, answer.views

  print("Total Views = {0}".format(total_views))
  print("Total Upvotes = {0}".format(total_upvotes))
  print("Average Views = {0}".format(total_views / limit))
  print("Average Upvotes = {0}".format(total_upvotes / limit))
  avg_up = (float(total_upvotes) / float(total_views)) * 100
  print("On an average %.2f viewers upvoted the answer" % avg_up)

  # Writer Stat
  with open('top_writers_2016.json', 'r') as fstream:
    writer_list = json.load(fstream)
  notw = 0
  for w in writer_list:
    if w['uname'] in writer:
      notw += 1
  print("{0} People on this list are Top Writers(2016)".format(notw))
  sorted_writer = sorted(writer.items(), key=operator.itemgetter(1),
                         reverse=True)
  print sorted_writer[:10]
  print("Total number of unique writers is {0}".format(len(sorted_writer)))
  total_followers = 0
  total_answers = 0
  for tup in sorted_writer:
    profile = Profile.get(Profile.uname == tup[0])
    total_followers += int(profile.followers)
    total_answers += int(profile.total_answers)
  print("Average number of followers of each {0}".format(
    total_followers / len(sorted_writer)))
  print("Average number of answers written by each is {}".format(
    total_answers / len(sorted_writer)))

  # Plotting Graph
  figure = plt.figure(figsize=(10, 10))
  splt = figure.add_subplot(1, 1, 1)
  splt.set_title("Views vs Upvote")
  splt.plot([answer.views for answer in answers],
           [answer.upvotes for answer in answers],
           '.', color='green')
  splt.set_xlabel('Views')
  splt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
  splt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
  splt.set_xlim([0, 1500000])
  splt.set_ylim([10000, 25000])
  splt.set_ylabel('Upvotes')
  figure.tight_layout()
  figure.savefig('view_upvote.png', facecolor='white', edgecolor='black')