def main(): user_df = get_user_data("*") user_list = list(pandas.unique(user_df[(user_df["role"]=="STUDENT")]["id"])) zpdp = ZPDPredictor() S_list = [] X_list = [] U_list = [] Qv_list = [] pass_list = [] atts_list = [] i_users = 0 n_users = 50 for u in user_list[0:n_users]: i_users += 1 print(i_users) attempts = get_attempts_from_db(u) ts_list = list(attempts["timestamp"]) # S_list = gen_semi_static() tX = gen_experience(u, ts_list) X_list += tX U_list += gen_success(u, ts_list) Qv_list += encode_q_vectors(attempts) S_list += [numpy.zeros(1) for t in tX] pass_list += list(attempts["correct"]==True) atts_list += [numpy.ones(1) for t in tX] if (len(X_list) > 10000) or (i_users==n_users): S_list = numpy.array(S_list) X_list = numpy.array(X_list) U_list = numpy.array(U_list) Qv_list = numpy.array(Qv_list) pass_list = numpy.array(pass_list) atts_list = numpy.array(atts_list) zpdp.train((S_list,X_list,U_list),Qv_list,pass_list, atts_list) S_list = [] X_list = [] U_list = [] Qv_list = [] pass_list = [] atts_list = [] for u in user_list[n_users:2*n_users]: attempts = get_attempts_from_db(u) ts_list = list(attempts["timestamp"]) delta_x = gen_experience(u, ts_list) X_list += delta_x U_list += gen_success(u, ts_list) Qv_list += encode_q_vectors(attempts) S_list += [numpy.zeros(1) for t in delta_x] pass_list += list(attempts["correct"] == True) S_list = numpy.array(S_list) X_list = numpy.array(X_list) U_list = numpy.array(U_list) Qv_list = numpy.array(Qv_list) pass_list = numpy.array(pass_list) metrics = zpdp.pass_model.evaluate([S_list,X_list,U_list,Qv_list], pass_list) print(metrics)
def gen_qhist(psi, ts): raw_attempts = get_attempts_from_db(psi) attempts = raw_attempts[(raw_attempts["timestamp"] <= ts)] l1 = list(attempts["question_id"]) l2 = list(attempts["timestamp"]) qhist = list( zip(l1,l2) ) return qhist
def gen_experience(psi, ts_list): raw_attempts = get_attempts_from_db(psi) X_list = [] # if raw_attempts.empty: # print("student {} has no X attempts".format(psi)) # return X_list X = numpy.zeros(len(all_qids)) for ts in sorted(ts_list): attempts = raw_attempts[(raw_attempts["timestamp"] <= ts)] hits = attempts["question_id"] for qid in list(hits): try: qix = reverse_qid_dict[qid] except: print("UNK Qn ", qid) continue # X = numpy.max(X-.1,0) # X -= 0.02 # reduce to zero in 50 moves # X[X < 0] = 0.0 #bottom out at zero # X[X > 0] += 0.01 X[qix] = 1 # print("birdvs iirdvs", numpy.median(X), numpy.sum(X)) X_list.append(numpy.copy(X)) raw_attempts = raw_attempts[(raw_attempts["timestamp"] > ts)] return X_list
def gen_semi_static(psi, dob_cache, ts_list): S_list = [] raw_attempts = get_attempts_from_db(psi) # if raw_attempts.empty: # print("student {} has no S attempts".format(psi)) # return [] for ts in sorted(ts_list): age=None xp_atts = 0 sx = 0 days = 1.0 attempts = raw_attempts[raw_attempts["timestamp"] <= ts] dob = dob_cache[psi] age = (ts - dob).days / 365.242 if (not isinstance(age,float)) or (age>=100) or (age<10): age = 16.9 if not attempts.empty: # print("chex...") maxdate = (attempts["timestamp"]).max() mindate = (attempts["timestamp"]).min() days = (maxdate - mindate).days if numpy.isnan(days): input(days) days = 1.0 if days<1.0 else days #xp_runs = len(numpy.unique(attempts["question_id"])) xp_atts = attempts.shape[0] correct = attempts[attempts["correct"] == True] sx = correct.shape[0] # rat = sx/xp if xp>0 else 0 # print("done...") # S_list.append( numpy.array([age, days, xp_runs, xp_atts, (xp_atts/days), (xp_runs/days), (sx/xp_atts if xp_atts else 0)]) ) #,rat,xp/days,sx/days])) S_list.append(numpy.array([age, days, (xp_atts/days), (sx/xp_atts if xp_atts else 0)])) return S_list
def gen_experience(psi, ts_list): raw_attempts = get_attempts_from_db(psi) X_list = [] X = numpy.zeros(len(all_qids)) for ts in sorted(ts_list): attempts = raw_attempts[(raw_attempts["timestamp"] < ts)] hits = attempts["question_id"] for qid in list(hits): try: qix = reverse_qid_dict[qid] except: print("UNK Qn ", qid) continue X[qix] = 1 X_list.append(numpy.copy(X)) raw_attempts = raw_attempts[(raw_attempts["timestamp"] >= ts)] return X_list
def gen_success(psi,ts_list): raw_attempts = get_attempts_from_db(psi) U_list = [] U = numpy.zeros(len(all_qids)) for ts in sorted(ts_list): attempts = raw_attempts[(raw_attempts["timestamp"] < ts)] hits = attempts[(attempts["correct"] == True)] hits = hits["question_id"] for qid in list(hits): try: qix = reverse_qid_dict[qid] except: print("UNK Qn ", qid) continue attct = len(attempts[attempts["question_id"]==qid]) U[qix] = (1.0/attct) U_list.append(numpy.copy(U)) raw_attempts = raw_attempts[(raw_attempts["timestamp"] >= ts)] return U_list
def filter_assignments(assignments, book_only): # query = "select id, gameboard_id, group_id, owner_user_id, creation_date from assignments order by creation_date asc" assignments["include"] = True print(assignments.shape) map = make_gb_question_map() meta = get_meta_data() for ix in range(assignments.shape[0]): include = True gr_id = assignments.loc[ix, "group_id"] if book_only: gb_id = assignments.loc[ix, "gameboard_id"] hexes = map[gb_id] for hx in hexes: hx = hx.split("|")[0] if not (hx.startswith("ch_") or hx.startswith("ch-i")): include = False break if include: students = get_student_list([gr_id]) if students.empty: include = False if include: include = False for psi in list(students["user_id"]): # print("checking",psi) atts = get_attempts_from_db(psi) if not atts.empty: # print("OK") include = True break if not include: assignments.loc[ix, "include"] = False # assignments = assignments[assignments["include"]==True] print(assignments.shape) return assignments
def save_class_report_card(ts, aid, gr_id, S, X, U, A, y, y_preds, slist, q_names_df): N = y_preds.shape[0] print(N) sum_preds = numpy.sum(y_preds, axis=0) print("sum of sums", numpy.sum(sum_preds)) sum_preds = sum_preds / N max_sum_ix = sum_preds.argmax() max_sum_prob = sum_preds.max() max_probs = y_preds.max(axis=1) max_ixs = y_preds.argmax(axis=1) max_labs = [pid_override[m] for m in max_ixs] vote_ct = Counter() for label in max_labs: vote_ct[label] += 1 max_vote_lab = vote_ct.most_common(1)[0][0] max_sum_lab = pid_override[max_sum_ix] print("max sum lab =", max_sum_lab, max_sum_prob) print("votes counted:", vote_ct.most_common(5)) print("most voted =", max_vote_lab) wb = openpyxl.Workbook() ws = wb.active fn_ai = str(aid) r = 1 col_headers = [ "student", "age", "months_on_isaac", "qns_tried", "successes", "prev_assignts", "hexes_attempted", "top_10_topics", "last_10_qns", "ISAAC_SUGGESTS", "DIFF: (too easy 1..5 too hard)", "TOPIC:(bad 1..3 good)" ] col_widths = [len(ch) for ch in col_headers] for c, cv in enumerate(col_headers): ws.cell(r, 1 + c, cv) ws.cell(2, 2, ts) ws.cell(2, 1, "Classroom sugg'n 1:") ws.cell(3, 1, "Classroom sugg'n 2:") ws.cell(2, 10, max_sum_lab) ws.cell(3, 10, max_vote_lab) r = 4 months_on_list = [] for s, x, u, a, t, psi, maxlab in zip(S, X, U, A, y, slist, max_labs): atts = get_attempts_from_db(psi) atts = atts[atts["timestamp"] < ts] pids = [] for qid in atts["question_id"]: pid = qid.split("|")[0] if pid not in pids: pids.append(pid) pids = "\n".join(map(str, pids)) assigned = [] for ix, el in enumerate(a): if el > 0: label = pid_override[ix] page = label.split("|")[0] if page not in assigned: assigned.append(page) if len(assigned) > 0: assigned = "\n".join(map(str, assigned)) big5 = get_top_subjs(x, 10) if len(big5) > 0: big5 = "\n".join(map(str, big5)) natts = atts.shape[0] nsucc = atts[atts["correct"] == True].shape[0] last5 = list(pandas.unique(atts["question_id"])[-10:]) temp5 = [] for n in last5: tit = q_names_df.loc[n, "title"] if str(tit) != "nan": temp5.append("{} ({})".format(tit, n)) else: temp5.append(n) last5 = temp5 last5 = "\n".join(map(str, last5)) # if len(qh) > 0: # ql, tl = zip(*qh) # last5 = [q for q in numpy.unique(ql)[-5:]] # last5 = "\n".join(last5) # last5 = '{}'.format(last5) # wrap in quotes # else: # last5 = [] months_on = s[1] / 30.44 months_on_list.append(months_on) c = 1 for cv in [ psi, int(10 * s[0]) / 10.0, "{:.1f}".format(months_on), str(numpy.sum(x > 0)) + " (" + str(numpy.sum(x)) + ")", numpy.sum(u), [], pids, big5, last5, maxlab ]: if cv == []: cv = "-" elif len(str(cv).split("\n")[0]) > col_widths[c - 1]: col_widths[c - 1] = len(str(cv)) ws.cell(r, c, cv) c += 1 r += 1 # for ci, cw in enumerate(col_widths): # ws.column_dimensions[get_column_letter(ci + 1)].width = cw # # for ri, rh in enumerate(row_heights): # ws.row_dimensions[ri+2].height = rh for col in ws.columns: max_length = 0 column = col[0].column # Get the column name for cell in col: cell.alignment = Alignment(horizontal="center", vertical="top") try: # Necessary to avoid error on empty cells this_max = max([len(s) for s in str(cell.value).split('\n')]) if this_max > max_length: max_length = this_max except: pass adjusted_width = max_length * 1.2 ws.column_dimensions[column].width = adjusted_width for row in ws.rows: max_height = 0 rowname = row[0].row # Get the column name for cell in row: try: # Necessary to avoid error on empty cells cell_h = len(str(cell.value).split('\n')) print("for row {} cell value is {} at height {}".format( rowname, cell.value, cell_h)) if cell_h > max_height: print("{} super {}, replaceing".format(cell_h, max_height)) max_height = cell_h except: pass adjusted_height = max_height * 11.5 # convert to points?? ws.row_dimensions[rowname].height = adjusted_height months_av = mean(months_on_list) wb.save('./report_cards/{:.1f}_{}_{}.xlsx'.format(months_av, gr_id, aid))
# print(rd) student_static[psi] = (rd, ) l_ts = pandas.to_datetime("1970-01-01 00:00:00") l_hexes = [] else: l_ts = last_ts[psi] l_hexes = last_hexes[psi] S, X, U, A = pickle.loads( zlib.decompress(SXUA[psi][l_ts])) # S,X,U,A = copy(S),copy(X),copy(U),copy(A) #make updates # if psi ==118651: # print("birdskoeping") attempts = get_attempts_from_db(psi) attempts = attempts[attempts["timestamp"] < ts] all_wins = list( attempts[(attempts["correct"] == True)]["question_id"]) recent_attempts = attempts[attempts["timestamp"] >= l_ts] # qids = list(set(recent_attempts["question_id"])) qids = list(set(recent_attempts["question_id"])) recent_wins = list(recent_attempts[( recent_attempts["correct"] == True)]["question_id"]) for qid in qids: try: qix = all_qids.index(qid) attct = ( recent_attempts["question_id"] == qid).sum()