Esempio n. 1
0
def main():
    user_df = get_user_data("*")
    user_list = list(pandas.unique(user_df[(user_df["role"]=="STUDENT")]["id"]))
    zpdp = ZPDPredictor()
    S_list = []
    X_list = []
    U_list = []
    Qv_list = []
    pass_list = []
    atts_list = []

    i_users = 0
    n_users = 50
    for u in user_list[0:n_users]:
        i_users += 1
        print(i_users)
        attempts = get_attempts_from_db(u)
        ts_list = list(attempts["timestamp"])
        # S_list = gen_semi_static()
        tX = gen_experience(u, ts_list)
        X_list += tX
        U_list += gen_success(u, ts_list)
        Qv_list += encode_q_vectors(attempts)
        S_list += [numpy.zeros(1) for t in tX]
        pass_list += list(attempts["correct"]==True)
        atts_list += [numpy.ones(1) for t in tX]
        if (len(X_list) > 10000) or (i_users==n_users):
            S_list = numpy.array(S_list)
            X_list = numpy.array(X_list)
            U_list = numpy.array(U_list)
            Qv_list = numpy.array(Qv_list)
            pass_list = numpy.array(pass_list)
            atts_list = numpy.array(atts_list)
            zpdp.train((S_list,X_list,U_list),Qv_list,pass_list, atts_list)
            S_list = []
            X_list = []
            U_list = []
            Qv_list = []
            pass_list = []
            atts_list = []

    for u in user_list[n_users:2*n_users]:
        attempts = get_attempts_from_db(u)
        ts_list = list(attempts["timestamp"])
        delta_x = gen_experience(u, ts_list)
        X_list += delta_x
        U_list += gen_success(u, ts_list)
        Qv_list += encode_q_vectors(attempts)
        S_list += [numpy.zeros(1) for t in delta_x]
        pass_list += list(attempts["correct"] == True)
    S_list = numpy.array(S_list)
    X_list = numpy.array(X_list)
    U_list = numpy.array(U_list)
    Qv_list = numpy.array(Qv_list)
    pass_list = numpy.array(pass_list)
    metrics = zpdp.pass_model.evaluate([S_list,X_list,U_list,Qv_list], pass_list)
    print(metrics)
Esempio n. 2
0
def gen_qhist(psi, ts):
    raw_attempts = get_attempts_from_db(psi)
    attempts = raw_attempts[(raw_attempts["timestamp"] <= ts)]
    l1 = list(attempts["question_id"])
    l2 = list(attempts["timestamp"])
    qhist = list( zip(l1,l2) )
    return qhist
Esempio n. 3
0
def gen_experience(psi, ts_list):
    raw_attempts = get_attempts_from_db(psi)
    X_list = []
    # if raw_attempts.empty:
    #     print("student {} has no X attempts".format(psi))
    #     return X_list
    X = numpy.zeros(len(all_qids))
    for ts in sorted(ts_list):
        attempts = raw_attempts[(raw_attempts["timestamp"] <= ts)]
        hits = attempts["question_id"]
        for qid in list(hits):
            try:
                qix = reverse_qid_dict[qid]
            except:
                print("UNK Qn ", qid)
                continue
            # X = numpy.max(X-.1,0)
            # X -= 0.02  # reduce to zero in 50 moves
            # X[X < 0] = 0.0 #bottom out at zero
            # X[X > 0] += 0.01
            X[qix] = 1
            # print("birdvs iirdvs", numpy.median(X), numpy.sum(X))
        X_list.append(numpy.copy(X))
        raw_attempts = raw_attempts[(raw_attempts["timestamp"] > ts)]
    return X_list
Esempio n. 4
0
def gen_semi_static(psi, dob_cache, ts_list):
    S_list = []
    raw_attempts = get_attempts_from_db(psi)
    # if raw_attempts.empty:
    #     print("student {} has no S attempts".format(psi))
    #     return []
    for ts in sorted(ts_list):
        age=None
        xp_atts = 0
        sx = 0
        days = 1.0
        attempts = raw_attempts[raw_attempts["timestamp"] <= ts]
        dob = dob_cache[psi]
        age = (ts - dob).days / 365.242
        if (not isinstance(age,float)) or (age>=100) or (age<10):
            age = 16.9

        if not attempts.empty:
            # print("chex...")
            maxdate = (attempts["timestamp"]).max()
            mindate = (attempts["timestamp"]).min()
            days = (maxdate - mindate).days
            if numpy.isnan(days):
                input(days)
            days = 1.0 if days<1.0 else days

            #xp_runs = len(numpy.unique(attempts["question_id"]))
            xp_atts = attempts.shape[0]
            correct = attempts[attempts["correct"] == True]
            sx = correct.shape[0]
            # rat = sx/xp if xp>0 else 0
            # print("done...")
            # S_list.append( numpy.array([age, days, xp_runs, xp_atts, (xp_atts/days), (xp_runs/days), (sx/xp_atts if xp_atts else 0)]) ) #,rat,xp/days,sx/days]))
        S_list.append(numpy.array([age, days, (xp_atts/days), (sx/xp_atts if xp_atts else 0)]))
    return S_list
Esempio n. 5
0
def gen_experience(psi, ts_list):
    raw_attempts = get_attempts_from_db(psi)
    X_list = []
    X = numpy.zeros(len(all_qids))
    for ts in sorted(ts_list):
        attempts = raw_attempts[(raw_attempts["timestamp"] < ts)]
        hits = attempts["question_id"]
        for qid in list(hits):
            try:
                qix = reverse_qid_dict[qid]
            except:
                print("UNK Qn ", qid)
                continue
            X[qix] = 1
        X_list.append(numpy.copy(X))
        raw_attempts = raw_attempts[(raw_attempts["timestamp"] >= ts)]
    return X_list
Esempio n. 6
0
def gen_success(psi,ts_list):
    raw_attempts = get_attempts_from_db(psi)
    U_list = []
    U = numpy.zeros(len(all_qids))
    for ts in sorted(ts_list):
        attempts = raw_attempts[(raw_attempts["timestamp"] < ts)]
        hits = attempts[(attempts["correct"] == True)]
        hits = hits["question_id"]
        for qid in list(hits):
            try:
                qix = reverse_qid_dict[qid]
            except:
                print("UNK Qn ", qid)
                continue
            attct = len(attempts[attempts["question_id"]==qid])
            U[qix] = (1.0/attct)
        U_list.append(numpy.copy(U))
        raw_attempts = raw_attempts[(raw_attempts["timestamp"] >= ts)]
    return U_list
Esempio n. 7
0
def filter_assignments(assignments, book_only):
    # query = "select id, gameboard_id, group_id, owner_user_id, creation_date from assignments order by creation_date asc"
    assignments["include"] = True
    print(assignments.shape)
    map = make_gb_question_map()
    meta = get_meta_data()
    for ix in range(assignments.shape[0]):
        include = True
        gr_id = assignments.loc[ix, "group_id"]

        if book_only:
            gb_id = assignments.loc[ix, "gameboard_id"]
            hexes = map[gb_id]
            for hx in hexes:
                hx = hx.split("|")[0]
                if not (hx.startswith("ch_") or hx.startswith("ch-i")):
                    include = False
                    break

        if include:
            students = get_student_list([gr_id])
            if students.empty:
                include = False

        if include:
            include = False
            for psi in list(students["user_id"]):
                # print("checking",psi)
                atts = get_attempts_from_db(psi)
                if not atts.empty:
                    # print("OK")
                    include = True
                    break

        if not include:
            assignments.loc[ix, "include"] = False

    # assignments = assignments[assignments["include"]==True]
    print(assignments.shape)
    return assignments
Esempio n. 8
0
def save_class_report_card(ts, aid, gr_id, S, X, U, A, y, y_preds, slist,
                           q_names_df):

    N = y_preds.shape[0]
    print(N)
    sum_preds = numpy.sum(y_preds, axis=0)
    print("sum of sums", numpy.sum(sum_preds))
    sum_preds = sum_preds / N
    max_sum_ix = sum_preds.argmax()
    max_sum_prob = sum_preds.max()

    max_probs = y_preds.max(axis=1)
    max_ixs = y_preds.argmax(axis=1)
    max_labs = [pid_override[m] for m in max_ixs]

    vote_ct = Counter()
    for label in max_labs:
        vote_ct[label] += 1

    max_vote_lab = vote_ct.most_common(1)[0][0]
    max_sum_lab = pid_override[max_sum_ix]
    print("max sum lab =", max_sum_lab, max_sum_prob)
    print("votes counted:", vote_ct.most_common(5))
    print("most voted =", max_vote_lab)

    wb = openpyxl.Workbook()
    ws = wb.active

    fn_ai = str(aid)
    r = 1
    col_headers = [
        "student", "age", "months_on_isaac", "qns_tried", "successes",
        "prev_assignts", "hexes_attempted", "top_10_topics", "last_10_qns",
        "ISAAC_SUGGESTS", "DIFF: (too easy 1..5 too hard)",
        "TOPIC:(bad 1..3 good)"
    ]
    col_widths = [len(ch) for ch in col_headers]
    for c, cv in enumerate(col_headers):
        ws.cell(r, 1 + c, cv)

    ws.cell(2, 2, ts)

    ws.cell(2, 1, "Classroom sugg'n 1:")
    ws.cell(3, 1, "Classroom sugg'n 2:")
    ws.cell(2, 10, max_sum_lab)
    ws.cell(3, 10, max_vote_lab)

    r = 4
    months_on_list = []
    for s, x, u, a, t, psi, maxlab in zip(S, X, U, A, y, slist, max_labs):

        atts = get_attempts_from_db(psi)
        atts = atts[atts["timestamp"] < ts]

        pids = []
        for qid in atts["question_id"]:
            pid = qid.split("|")[0]
            if pid not in pids:
                pids.append(pid)
        pids = "\n".join(map(str, pids))

        assigned = []
        for ix, el in enumerate(a):
            if el > 0:
                label = pid_override[ix]
                page = label.split("|")[0]
                if page not in assigned:
                    assigned.append(page)
        if len(assigned) > 0:
            assigned = "\n".join(map(str, assigned))

        big5 = get_top_subjs(x, 10)
        if len(big5) > 0:
            big5 = "\n".join(map(str, big5))

        natts = atts.shape[0]
        nsucc = atts[atts["correct"] == True].shape[0]

        last5 = list(pandas.unique(atts["question_id"])[-10:])
        temp5 = []
        for n in last5:
            tit = q_names_df.loc[n, "title"]
            if str(tit) != "nan":
                temp5.append("{} ({})".format(tit, n))
            else:
                temp5.append(n)
        last5 = temp5
        last5 = "\n".join(map(str, last5))

        # if len(qh) > 0:
        #     ql, tl = zip(*qh)
        #     last5 = [q for q in numpy.unique(ql)[-5:]]
        #     last5 = "\n".join(last5)
        #     last5 = '{}'.format(last5)  # wrap in quotes
        # else:
        #     last5 = []

        months_on = s[1] / 30.44
        months_on_list.append(months_on)

        c = 1
        for cv in [
                psi,
                int(10 * s[0]) / 10.0, "{:.1f}".format(months_on),
                str(numpy.sum(x > 0)) + " (" + str(numpy.sum(x)) + ")",
                numpy.sum(u), [], pids, big5, last5, maxlab
        ]:
            if cv == []:
                cv = "-"
            elif len(str(cv).split("\n")[0]) > col_widths[c - 1]:
                col_widths[c - 1] = len(str(cv))
            ws.cell(r, c, cv)
            c += 1
        r += 1

    # for ci, cw in enumerate(col_widths):
    #     ws.column_dimensions[get_column_letter(ci + 1)].width = cw
    #
    # for ri, rh in enumerate(row_heights):
    #     ws.row_dimensions[ri+2].height = rh

    for col in ws.columns:
        max_length = 0
        column = col[0].column  # Get the column name
        for cell in col:
            cell.alignment = Alignment(horizontal="center", vertical="top")
            try:  # Necessary to avoid error on empty cells
                this_max = max([len(s) for s in str(cell.value).split('\n')])
                if this_max > max_length:
                    max_length = this_max
            except:
                pass
        adjusted_width = max_length * 1.2
        ws.column_dimensions[column].width = adjusted_width

    for row in ws.rows:
        max_height = 0
        rowname = row[0].row  # Get the column name
        for cell in row:
            try:  # Necessary to avoid error on empty cells
                cell_h = len(str(cell.value).split('\n'))
                print("for row {} cell value is {} at height {}".format(
                    rowname, cell.value, cell_h))
                if cell_h > max_height:
                    print("{} super {}, replaceing".format(cell_h, max_height))
                    max_height = cell_h
            except:
                pass
        adjusted_height = max_height * 11.5  # convert to points??
        ws.row_dimensions[rowname].height = adjusted_height

    months_av = mean(months_on_list)
    wb.save('./report_cards/{:.1f}_{}_{}.xlsx'.format(months_av, gr_id, aid))
Esempio n. 9
0
                        # print(rd)
                        student_static[psi] = (rd, )
                        l_ts = pandas.to_datetime("1970-01-01 00:00:00")
                        l_hexes = []
                    else:
                        l_ts = last_ts[psi]
                        l_hexes = last_hexes[psi]
                        S, X, U, A = pickle.loads(
                            zlib.decompress(SXUA[psi][l_ts]))
                    # S,X,U,A = copy(S),copy(X),copy(U),copy(A)
                    #make updates

                    # if psi ==118651:
                    #     print("birdskoeping")

                    attempts = get_attempts_from_db(psi)
                    attempts = attempts[attempts["timestamp"] < ts]
                    all_wins = list(
                        attempts[(attempts["correct"] == True)]["question_id"])

                    recent_attempts = attempts[attempts["timestamp"] >= l_ts]
                    # qids = list(set(recent_attempts["question_id"]))
                    qids = list(set(recent_attempts["question_id"]))
                    recent_wins = list(recent_attempts[(
                        recent_attempts["correct"] == True)]["question_id"])

                    for qid in qids:
                        try:
                            qix = all_qids.index(qid)
                            attct = (
                                recent_attempts["question_id"] == qid).sum()