Esempio n. 1
0
def get_data(path_out):

  db = get_db()
  testruns = db.execute("SELECT * FROM test_run WHERE t_stop").fetchall()
  db.close()
  testruns = fix.testruns(testruns)

  dx = 0.9/4

  labels = order
  handler = {}
  colors = []
  markers = []
#  mappings_style = dict(zip(order, zip(iter_colors, iter_markers('bar'))))
  num_bar = 0

  def data_get(valid):
    tests_per_user = {}
    for run in testruns:
      user = run['id_user']
      tests_per_user.setdefault(user, 0)
      tests_per_user[user] += 1

    valid_users = [u for u,num in tests_per_user.items() if valid(num)]

    run_per_diff = {}
    for run in testruns:
      if not run['id_user'] in valid_users:
        continue
      start, stop = run_to_datetimes(run)
      diff = (stop-start)
      diff = round(diff.seconds + diff.microseconds/1e6)
      run_per_diff.setdefault(diff, 0)
      run_per_diff[diff] += 1

    data = []
    largest = (0,0)
    for diff, num_runs in run_per_diff.items():
      if largest[1] <= num_runs:
        largest = (diff, num_runs)
      data.append((diff, num_runs))
    return data, largest

  runs_per_type = {}
  for run in testruns:
    test_type = run['name']
    start, stop = run_to_datetimes(run)
    diff = (stop-start)
    diff = round(diff.seconds + diff.microseconds/1e6)
    runs_per_type.setdefault(test_type, {})
    runs_per_type[test_type].setdefault(diff, 0)
    runs_per_type[test_type][diff] += 1

  list_data = [data_get(valid=l) for l in [
#    lambda num: True,
    lambda num: num<16,
    lambda num: num>15,
  ]]

  largest = list_data[0][1]
#  data = [(zip(*data)) for data, _ in list_data]
#  for x, type_run in enumerate(labels):
#    most = (0,0)
#    loc_data = runs_per_type[type_run]
#    handler[type_run] = -4+x
#    for x,num in loc_data.items():
#      if num > most[1]:
#        most = (x,num)
#    data.append([most])


#  data = [data] + [
#    [
#      ([x+i for x in range(10,20)], list(range(10,20)))
#    ] for i in list(range(40))[::10]
#  ]

  fig, axs = plt.subplots(len(list_data), 1, sharex=True)
  fig.subplots_adjust(hspace=0)

  handlers = []
  list_data = list(zip(list_data, iter_colors, iter_markers('bar')))
  xticks = [1]+list(range(10, 150)[::10])
  lines_90 = []
  lines_50 = []
  for i, ((d, largest), color, hatch) in enumerate(list_data):
    axs[i].set_ylim(top=100)
    line_90 = 0
    line_50 = 0
    sum_y = 0
    xs, ys = zip(*d)
    total_y = sum(ys)
    for x,y in sorted(d):
      sum_y += y
      if sum_y/total_y >= 0.9 and not line_90:
        line_90 = x+1
        lines_90.append(line_90)
      if sum_y/total_y >= 0.5 and not line_50:
        line_50 = x+1
        lines_50.append(line_50)
    handlers.append(axs[i].bar(
      xs, ys,
      color=color,
      hatch=hatch,
      alpha=0.99,
      edgecolor='black'
    ))
    yticks = []
    for tick in list(range(0, 120)[::20])[1:]:
      diff = abs(largest[1]-tick)
      if diff < 10:
        yticks.append(largest[1])
      else:
        yticks.append(tick)

    axs[i].set_yticks(yticks)
  h90 = ""
  h50 = ""
  for i, line in enumerate(lines_90):
    ys = range(0,120)[::20]
    h90 = axs[i].plot([line]*len(ys), ys, "--", zorder=-10, alpha=0.8,
                  linewidth=1)[0]
  for i, line in enumerate(lines_50):
    ys = range(0,120)[::20]
    h50 = axs[i].plot([line]*len(ys), ys, "-.", zorder=-10, alpha=0.8,
                  linewidth=1)[0]
  fig.legend(
    [
      *handlers,
      h90,
      h50,
    ],
    [
#      'All users',
      '#$r\leq15$',
      '#$r\geq16$',
      '90th percentile',
      '50th percentile',
    ],
    loc='upper right',
    bbox_to_anchor=(0.88, 1.00),
    fontsize='small',
  )
  for i in range(len(list_data)):
    axs[i].vlines(xticks, 0, 100, alpha=0.2, linestyles='dotted', linewidth=1,
                  zorder=-20)
    axs[i].vlines([v+5 for v in xticks[:-1]], 0, 100, alpha=0.1,
                  linestyles='dotted', linewidth=1, zorder=-20)
  fig.set_size_inches(figure_units['size'])
  plt.xticks(xticks)
  fig.add_subplot(111, frameon=False)
  plt.tick_params(labelcolor='none', top=False, bottom=False, right=False,
                  left=False)
  plt.title('Histogram for all completion times, regular and outlier')
  plt.xlabel('Seconds')
  plt.ylabel('Size of group')
  #fig.tight_layout()
  figure_save(fig, path_out)

  return None
def get_data():

  db = get_db()
  testruns = db.execute("SELECT * FROM test_run WHERE t_stop").fetchall()
  db.close()
  testruns = fix.testruns(testruns)

  dx = 0.9/4

  labels = list(reversed(order))

  def get_data(valid):
    tests_per_user = {}
    for run in testruns:
      user = run['id_user']
      tests_per_user.setdefault(user, 0)
      tests_per_user[user] += 1

    valid_users = [u for u,num in tests_per_user.items() if valid(num)]

    run_per_type = {}
    for run in testruns:
      if not run['id_user'] in valid_users:
        continue
      test_type = run['name']
      start, stop = run_to_datetimes(run)
      diff = stop-start
      run_per_type.setdefault(test_type, []).append(diff)


    data = []
    for x, l in enumerate(labels):
      times = sorted(run_per_type[l])
      len_times = len(times)
      average = sum([d.seconds+d.microseconds/1e6 for d in times])/len_times
      if len_times-1%2:
        mean = times[int((len_times-1)/2)].seconds
      else:
        index = int((len_times-1)/2)
        mean = (times[index] + times[index+1]).seconds/2
      data.append((x+dx/2, average))
      data.append((x-dx/2, mean))
    return data

  data = get_data(valid=lambda num: num<16)
  data_outliers = [(x-dx*2, v) for x,v in get_data(valid=lambda num: num>15)]

  data = [
    (zip(*data[0::2])),
    (zip(*data[1::2])),
    (zip(*data_outliers[0::2])),
    (zip(*data_outliers[1::2])),
  ]


  return {
    'method': 'barh',
    'data': data,
    'yticks': ([x-0.9/4 for x in range(len(labels))], [l.title() for l in labels]),
    'legend': [
      'Average #$r\leq15$',
      'Median #$r\leq15$',
      'Average #$r\geq16$',
      'Median #$r\geq16$',
    ],
    'ylabel': "Task type",
    'xlabel': "Seconds",
    'kwargs': {
      'barh': {
        'height': dx,
      },
      'legend': {
        'fontsize': 'small',
      }
    }
  }
Esempio n. 3
0
def get_data():

    db = get_db()
    users = db.execute("SELECT * FROM test_user").fetchall()
    answers = db.execute("SELECT * FROM answer").fetchall()
    testruns = db.execute("SELECT * FROM test_run WHERE t_stop").fetchall()
    db.close()

    testruns = fix.testruns(testruns)

    answers_per_question = {}
    for answer in answers:
        answers_per_question.setdefault(answer['question'], []).append(answer)

    questions = [
        q for q in answers_per_question.keys() if q[:4] in ['init', 'inti']
    ]

    q_transform = {q: q.split('_', 1)[-1] for q in questions}
    q_final = {
        'age': 'age',
        'identifies': 'identifies as',
        'screen_size': 'screen size',
        'device_type': 'input type',
    }

    categories = {
        'identifies': ['female', 'male', 'other'],
        'device_type': ['mouse', 'trackpad', 'touch', 'other'],
        'screen': ['desktop', 'laptop', 'tablet', 'mobile'],
    }

    ans_data_per_q = {}
    for q, aans in answers_per_question.items():
        if q not in questions:
            continue
        q = q_final[q_transform[q]]
        for a in aans:
            a = a['answer']
            ans_data_per_q.setdefault(q, {})
            ans_data_per_q[q].setdefault(a, 0)
            ans_data_per_q[q][a] += 1

    questions_post = [
        q for q in answers_per_question.keys()
        if q[:4] not in ['init', 'inti']
    ][5:][:-1]
    completed_post = {
        a['str_id']
        for a in answers_per_question[questions_post[0]]
    }

    ans_data_per_q_post = {}
    for q, aans in answers_per_question.items():
        if q not in questions:
            continue
        q = q_final[q_transform[q]]
        for a in aans:
            if not a['str_id'] in completed_post:
                continue
            a = a['answer']
            ans_data_per_q_post.setdefault(q, {})
            ans_data_per_q_post[q].setdefault(a, 0)
            ans_data_per_q_post[q][a] += 1

    for d in [ans_data_per_q, ans_data_per_q_post]:
        sum_ages = 0
        sum_num = 0
        for age, num in d['age'].items():
            sum_num += int(num)
            sum_ages += int(age) * int(num)
        dataset = 'pre' if d == ans_data_per_q else 'post'
        print(
            f"Average age ({dataset}): {sum_ages}/{sum_num}: {sum_ages/sum_num}"
        )

    for d in [ans_data_per_q, ans_data_per_q_post]:
        gen_total = sum(d['identifies as'].values())
        for ident, num in d['identifies as'].items():
            percent = num / gen_total * 100
            dataset = 'pre' if d == ans_data_per_q else 'post'
            print(
                f"Identifies as ({dataset}): {ident}: {num}/{gen_total} ({percent})"
            )

    last_runs = {}
    for run in testruns:
        user = run['id_user']
        if not user in completed_post:
            continue
        t_stop = run['t_stop']
        t_stop = datetime.strptime(t_stop, '%Y-%m-%d %H:%M:%S.%f%Z')
        if not last_runs.get(user):
            last_runs[user] = t_stop
        if t_stop > last_runs[user]:
            last_runs[user] = t_stop

    users = {u['str_id']: u for u in users}
    diffs = []
    import datetime as dt
    diffs_total = dt.timedelta()
    for user, last in last_runs.items():
        registered = users[user]['t_created']
        registered = datetime.strptime(registered, '%Y-%m-%d %H:%M:%S.%f%Z')
        last_runs[user] = (registered, last)
        diffs.append(last - registered)
        diffs_total += diffs[-1]

    diffs = sorted(diffs)
    print(
        f"Average time from register to last question: {(diffs_total/len(diffs))}"
    )
    print(len(diffs))
    print(f"Median time: {diffs[int(len(diffs)/2)]}")
    print(f"Fastest: {diffs[0]}")
    print(f"Slowest: {diffs[-1]}")