コード例 #1
0
def generate_binwgt_run_files(xfn, yfn, alpha, _w, fade, cats, cat_lookup,
                              all_qids, users, stretches, passrates, passquals,
                              levels, mcmcdiffs, cat_ixs, n_classes):
    X_file = open(xfn, "w")
    y_file = open(yfn, "w")

    n_features = len(cats)

    print("using n_features=", n_features)

    run_ct = 0
    X = numpy.zeros(
        shape=n_features)  #init'se a new feature vector w same width as all_X
    print("Generating files for {} users...".format(len(users)))
    for u in users:
        #print("user = "******"../by_user/{}.txt".format(u), header=None)

        runs = extract_runs_w_timestamp(attempts)
        for run in runs:
            run_ct += 1
            ts, q, n_atts, n_pass = run
            qt = q.replace("|", "~")
            lev = levels[qt]
            if lev == 0:
                continue

            qdiff = calc_qdiff(qt,
                               passrates,
                               stretches,
                               levels,
                               mcmcdiffs,
                               mode=_w)

            catix = cat_ixs[cat_lookup[qt]]

            qenc = numpy.zeros(shape=qenc_width)
            qenc[catix] = 1.0

            X_file.write(",".join([str(x) for x in X]) + "," +
                         ",".join([str(e) for e in qenc]) + "\n")
            X = X * fade

            if (n_pass > 0):
                if n_classes == 2:
                    y = 0
                else:
                    y = (-1 if n_atts == 1 else 0)
                X[catix] = (1.0 - alpha) * X[catix] + alpha * 1.0
            else:
                y = 1
            y_file.write(str(y) + "\n")

        X_file.flush()
        y_file.flush()
    X_file.close()
    y_file.close()
    print(n_users, "users", run_ct, "runs", run_ct / float(n_users), "rpu")
コード例 #2
0
ファイル: IsaacLDAClustering.py プロジェクト: rjm49/isaacdata
        #exp_cols = [x+"_P" for x in all_qids] 
        #exp_cols = [x+"_X" for x in all_qids] # + [x+"_N" for x in all_qids] + [x+"_P" for x in all_qids] + ["total_qs"]
        
        exp_mx = pandas.DataFrame( index=users, columns=exp_cols )#, columns = questions)
        print("Created exp_mx of shape:", exp_mx.shape)
        
        #cnt_mx = pandas.DataFrame( index=users, columns=list(levels) )#, columns = questions)
        
        #exp_mx[:] = -1.0
        #exp_mx = numpy.zeros((len(users),len(questions)))
        exp_mx.fillna(0.0, inplace=True)
        #cnt_mx.fillna(0.0, inplace=True)
        for i,u in enumerate(users):
#             print(u,"...")
            uqatts = pandas.read_csv(base+"by_user/{}.txt".format(u), header=None)
            runs = extract_runs_w_timestamp(uqatts)
            
            for run in runs:
                ts,q,n_atts,n_pass= run
                q = q.replace("|","~")
#                 L = lev_lookup[q]
                c = cat_lookup[q]
                exp_mx.loc[u,c+"_X"] += 1.0
#                 exp_mx.loc[u,c+"_N"] += n_atts
#                 exp_mx.loc[u,c+"_P"] += n_pass#/n_atts
#                 exp_mx.loc[u,"total_qs"] += 1.0
#                   if(p>0):
#                     exp_mx.loc[u,c] += 1.0
            print(u,"done")
        exp_mx.fillna(0.0, inplace=True)
        #print(exp_mx.shape)
コード例 #3
0
def generate_run_files(alpha, _featureset_to_use, _w, fade, cats, cat_lookup, all_qids, users, stretches, passrates, passquals, levels, mcmcdiffs, cat_ixs):
    stem = _featureset_to_use+"_"+str(alpha) + "_" + str(fade) + "_" + _w
    x_filename= stem+"_X.csv"
    y_filename= stem+"_y.csv"

    X_file = open(stem+"_X.csv","w")
    y_file = open(stem+"_y.csv","w")

    n_features = len(cats)
    #     all_X = numpy.zeros(shape=(0,n_features))

    print("using n_features=", n_features)

    # tmx = numpy.loadtxt("../mcmc/X.csv", delimiter=",") # load the prob transition mx
    # qf = open("../mcmc/obsqs.txt")
    # qindex = [rec.split(",")[0] for rec in qf.read().splitlines()]
    # qf.close()
    #
    # print(tmx.shape[0], len(qindex))
    # assert tmx.shape[0] == len(qindex)
    # print("loaded transition data")

    run_ct= 0
    X = numpy.zeros(shape=n_features) #init'se a new feature vector w same width as all_X
    print("Generating files for {} users...".format(len(users)))
    for u in users:
        print("user = "******"../by_user/{}.txt".format(u), header=None)

        runs = extract_runs_w_timestamp(attempts)
        for run in runs:
            run_ct+=1
            ts, q, n_atts, n_pass = run
            qt = q.replace("|","~")
            lev = levels[qt]
            if lev<1:
                continue

            # qdiff = calc_qdiff(qt, passrates, stretches, levels, mcmcdiffs, mode=_w)

            catix = cat_ixs[ cat_lookup[qt] ]

            passrate = passrates[qt]
            qpassqual = passquals[qt]
            stretch = stretches[qt]
            mcmc = mcmcdiffs[qt] if qt in mcmcdiffs else 0
            # mcmc = 0
            # if(n_pass > 0):
            #     tailix = qindex.index(qt)
            #     headix = qindex.index(qt)
            #     mcmc = tmx[headix, tailix]
            #     print ("mcmc = ",mcmc)
            #print(qindex)

            qenc = numpy.zeros(shape=qenc_width)
            # qenc[:] = 0.0 #reset question encoding
            q_weight = 1.0
            if _w == DW_NATTS or _w == DW_STRETCH:
                q_weight = stretch
            elif _w == DW_PASSRATE:
                q_weight = passrate
            elif _w == DW_LEVEL:
                q_weight = 1+lev
            elif _w == DW_MCMC:
                q_weight = mcmc
            qenc[catix] = q_weight  # set the next q category and diff

            X_file.write(",".join([str(x) for x in X])+","+",".join([str(e) for e in qenc])+"\n")
            X = X * fade

            a_weight = 1.0
            if _w == DW_BINARY:
                a_weight = 1.0
            elif _w == DW_NATTS:
                a_weight = n_atts
            elif _w == DW_NO_WEIGHT:
                a_weight = 1.0 / n_atts
            elif _w == DW_PASSRATE:
                a_weight = passrate / n_atts
            elif _w == DW_STRETCH:
                a_weight = stretch / n_atts
            elif _w == DW_MCMC:
                a_weight = mcmc / n_atts
            elif _w == DW_LEVEL:
                a_weight = (1+lev) / n_atts

            if (n_pass>0):
                if n_classes==2:
                    y = 0
                else:
                    y = (-1 if n_atts==1 else 0)
                X[catix] = (1.0-alpha)*X[catix] + alpha*a_weight
            else:
                y = 1
                #X[catix] = 0
                #X[catix] = retain*X[catix] -(1-retain)*upd

            y_file.write(str(y)+"\n")

        X_file.flush()
        y_file.flush()
    X_file.close()
    y_file.close()
    print(n_users, "users", run_ct,"runs", run_ct/float(n_users), "rpu")
    return x_filename,y_filename
コード例 #4
0
ファイル: profiler.py プロジェクト: rjm49/isaacdata
def profile_student_irt(u, ass_ts, cats, cat_lookup, cat_ixs, levels, concepts_all):
    #load student's files
    base = "../../../isaac_data_files/"
    #cats, cat_lookup, all_qids, users, _stretches_, levels, cat_ixs = init_objects(-1, path=base, seed=666)
    df = pd.read_csv(base + "hwgen1.csv", index_col=0, header=0)
    #runs = open(base + "by_runs/{}.txt".format(u)).readlines()
    fname = base+"by_user/{}.txt".format(u)
    try:
        attempts = pd.read_csv(fname, header=None)
    except FileNotFoundError:
        return []
    runs = extract_runs_w_timestamp(attempts)

    u_run_ct = len(runs)
    all_zero_level = True
    run_ct=0

    irts = {}
    subj_irts = {}
    print("num of runs", len(runs))
    cat_levels = [0] * len(cats)
    concept_levels = [0] * 100

    for run_ix, run in enumerate(runs):
        run_ct += 1
        ts, q, n_atts, n_pass = run
        ts = pd.to_datetime(ts)
        # print("rum", run_ct, ts)

        if ts > ass_ts:
            break
        # print(ts, "<=", ass_ts)
        qt = q.replace("|","~")
        cat = cat_lookup[qt]
        lev = levels[qt]

        if q not in df.index:
            continue
        concepts_raw = df.loc[q, "related_concepts"]

        concepts = eval(concepts_raw) if not pd.isna(concepts_raw) else []
        for c in concepts:
            if c not in irts:
                print("at run", run_ct, "new irt engine for", u,c)
                irts[c] = IRTEngine()
            else:
                irt = irts[c]
                irt.curr_theta = irt.update(lev, (n_pass > 0))
                irts[c] = irt
                print(u, c,"history =", irt.history)
                print("at run", run_ct, "irt update", u, c, irt.curr_theta)

        if cat not in subj_irts:
            subj_irts[cat] = IRTEngine()
        else:
            subj_irts[cat].curr_theta = subj_irts[cat].update(lev, (n_pass > 0))
            print("at run", run_ct, "irt cat update", u, cat, subj_irts[cat].curr_theta)

        for s in subj_irts:
            catix = cat_ixs[cat]
            theta = subj_irts[s].curr_theta
            print(s,"=",theta)
            cat_levels[catix]=theta
        if irts:
            print("\nConcept level abilities:")
        for c in irts:
            theta = irts[c].curr_theta
            print(c,"=",theta)
            conix = concepts_all.index(c)
            concept_levels[conix]=theta

#    concatd = cat_levels + concept_levels
    concatd = concept_levels
    print("Profile for user {}: {}".format(u, concatd))
    if concatd==[]:
        print("empty")
    return concatd
コード例 #5
0
ファイル: LatextExpGuesser.py プロジェクト: rjm49/isaacdata
    cqmx = np.zeros(shape=n_qids)
    csqmx = np.zeros(shape=n_qids)
    cfqmx = np.zeros(shape=n_qids)

    usersf = open("direct_mcmc_users.txt", "w")

    eps_cnt = 0
    qenc = np.zeros(shape=len(cats))
    for u in users:
        qenc[:, :] = 0.0
        xp = 0
        eps_cnt += 1
        print("user = "******"\n")
        attempts = pd.read_csv("../by_user/{}.txt".format(u), header=None)
        runs = extract_runs_w_timestamp(attempts)
        for run in runs[0:10]:
            ts, q, n_atts, n_pass = run
            q = q.replace("|", "~")
            qix = all_qids.index(q)
            xp += 1
            qmx[qix] += qenc
            cqmx[qix] += 1
            if n_pass > 0:
                sqmx[qix] += qenc
                csqmx[qix] += 1
            else:
                fqmx[qix] += qenc
                cfqmx[qix] += 1
            catix = cat_ixs[cat_lookup[q]]
            qenc[catix] += 1
コード例 #6
0
                end = dateutil.parser.parse(uqatts.iloc[-1, 0])
                if start < target <= end:
                    active_users.append(u)
            exp_mx_lookup[target] = numpy.zeros(shape=(len(active_users),
                                                       len(all_qids)))
            print("Created exp_mx of shape:", exp_mx_lookup[target].shape)

            for uix, u in enumerate(active_users):
                X = numpy.zeros(shape=(1,
                                       len(all_qids)))  #, dtype=numpy.int32)
                uqatts = pandas.read_csv(filebase + "by_user/{}.txt".format(u),
                                         header=None)
                uqatts[0] = pandas.to_datetime(uqatts[0])
                uqatts = uqatts[uqatts[0] <= target]
                #                 print("size for {}".format(target), uqatts.size)
                for run in extract_runs_w_timestamp(uqatts):
                    ts, q, n_atts, n_pass = run
                    #                     q = run[0]
                    #                     n_atts = run[1]
                    #                     n_pass = run[2]
                    q = q.replace("|", "~")
                    #                 L = lev_lookup[q]
                    c = cat_lookup[q]
                    #decay the user's previous career
                    exp_mx = exp_mx_lookup[target]
                    #exp_mx[uix] = decay * exp_mx[uix]
                    if (n_atts > 0):
                        qix = q_ixs[q]
                        exp_mx[uix, qix] = 1.0
#                     else:
#                         qix = q_ixs[q]