def generate_binwgt_run_files(xfn, yfn, alpha, _w, fade, cats, cat_lookup, all_qids, users, stretches, passrates, passquals, levels, mcmcdiffs, cat_ixs, n_classes): X_file = open(xfn, "w") y_file = open(yfn, "w") n_features = len(cats) print("using n_features=", n_features) run_ct = 0 X = numpy.zeros( shape=n_features) #init'se a new feature vector w same width as all_X print("Generating files for {} users...".format(len(users))) for u in users: #print("user = "******"../by_user/{}.txt".format(u), header=None) runs = extract_runs_w_timestamp(attempts) for run in runs: run_ct += 1 ts, q, n_atts, n_pass = run qt = q.replace("|", "~") lev = levels[qt] if lev == 0: continue qdiff = calc_qdiff(qt, passrates, stretches, levels, mcmcdiffs, mode=_w) catix = cat_ixs[cat_lookup[qt]] qenc = numpy.zeros(shape=qenc_width) qenc[catix] = 1.0 X_file.write(",".join([str(x) for x in X]) + "," + ",".join([str(e) for e in qenc]) + "\n") X = X * fade if (n_pass > 0): if n_classes == 2: y = 0 else: y = (-1 if n_atts == 1 else 0) X[catix] = (1.0 - alpha) * X[catix] + alpha * 1.0 else: y = 1 y_file.write(str(y) + "\n") X_file.flush() y_file.flush() X_file.close() y_file.close() print(n_users, "users", run_ct, "runs", run_ct / float(n_users), "rpu")
#exp_cols = [x+"_P" for x in all_qids] #exp_cols = [x+"_X" for x in all_qids] # + [x+"_N" for x in all_qids] + [x+"_P" for x in all_qids] + ["total_qs"] exp_mx = pandas.DataFrame( index=users, columns=exp_cols )#, columns = questions) print("Created exp_mx of shape:", exp_mx.shape) #cnt_mx = pandas.DataFrame( index=users, columns=list(levels) )#, columns = questions) #exp_mx[:] = -1.0 #exp_mx = numpy.zeros((len(users),len(questions))) exp_mx.fillna(0.0, inplace=True) #cnt_mx.fillna(0.0, inplace=True) for i,u in enumerate(users): # print(u,"...") uqatts = pandas.read_csv(base+"by_user/{}.txt".format(u), header=None) runs = extract_runs_w_timestamp(uqatts) for run in runs: ts,q,n_atts,n_pass= run q = q.replace("|","~") # L = lev_lookup[q] c = cat_lookup[q] exp_mx.loc[u,c+"_X"] += 1.0 # exp_mx.loc[u,c+"_N"] += n_atts # exp_mx.loc[u,c+"_P"] += n_pass#/n_atts # exp_mx.loc[u,"total_qs"] += 1.0 # if(p>0): # exp_mx.loc[u,c] += 1.0 print(u,"done") exp_mx.fillna(0.0, inplace=True) #print(exp_mx.shape)
def generate_run_files(alpha, _featureset_to_use, _w, fade, cats, cat_lookup, all_qids, users, stretches, passrates, passquals, levels, mcmcdiffs, cat_ixs): stem = _featureset_to_use+"_"+str(alpha) + "_" + str(fade) + "_" + _w x_filename= stem+"_X.csv" y_filename= stem+"_y.csv" X_file = open(stem+"_X.csv","w") y_file = open(stem+"_y.csv","w") n_features = len(cats) # all_X = numpy.zeros(shape=(0,n_features)) print("using n_features=", n_features) # tmx = numpy.loadtxt("../mcmc/X.csv", delimiter=",") # load the prob transition mx # qf = open("../mcmc/obsqs.txt") # qindex = [rec.split(",")[0] for rec in qf.read().splitlines()] # qf.close() # # print(tmx.shape[0], len(qindex)) # assert tmx.shape[0] == len(qindex) # print("loaded transition data") run_ct= 0 X = numpy.zeros(shape=n_features) #init'se a new feature vector w same width as all_X print("Generating files for {} users...".format(len(users))) for u in users: print("user = "******"../by_user/{}.txt".format(u), header=None) runs = extract_runs_w_timestamp(attempts) for run in runs: run_ct+=1 ts, q, n_atts, n_pass = run qt = q.replace("|","~") lev = levels[qt] if lev<1: continue # qdiff = calc_qdiff(qt, passrates, stretches, levels, mcmcdiffs, mode=_w) catix = cat_ixs[ cat_lookup[qt] ] passrate = passrates[qt] qpassqual = passquals[qt] stretch = stretches[qt] mcmc = mcmcdiffs[qt] if qt in mcmcdiffs else 0 # mcmc = 0 # if(n_pass > 0): # tailix = qindex.index(qt) # headix = qindex.index(qt) # mcmc = tmx[headix, tailix] # print ("mcmc = ",mcmc) #print(qindex) qenc = numpy.zeros(shape=qenc_width) # qenc[:] = 0.0 #reset question encoding q_weight = 1.0 if _w == DW_NATTS or _w == DW_STRETCH: q_weight = stretch elif _w == DW_PASSRATE: q_weight = passrate elif _w == DW_LEVEL: q_weight = 1+lev elif _w == DW_MCMC: q_weight = mcmc qenc[catix] = q_weight # set the next q category and diff X_file.write(",".join([str(x) for x in X])+","+",".join([str(e) for e in qenc])+"\n") X = X * fade a_weight = 1.0 if _w == DW_BINARY: a_weight = 1.0 elif _w == DW_NATTS: a_weight = n_atts elif _w == DW_NO_WEIGHT: a_weight = 1.0 / n_atts elif _w == DW_PASSRATE: a_weight = passrate / n_atts elif _w == DW_STRETCH: a_weight = stretch / n_atts elif _w == DW_MCMC: a_weight = mcmc / n_atts elif _w == DW_LEVEL: a_weight = (1+lev) / n_atts if (n_pass>0): if n_classes==2: y = 0 else: y = (-1 if n_atts==1 else 0) X[catix] = (1.0-alpha)*X[catix] + alpha*a_weight else: y = 1 #X[catix] = 0 #X[catix] = retain*X[catix] -(1-retain)*upd y_file.write(str(y)+"\n") X_file.flush() y_file.flush() X_file.close() y_file.close() print(n_users, "users", run_ct,"runs", run_ct/float(n_users), "rpu") return x_filename,y_filename
def profile_student_irt(u, ass_ts, cats, cat_lookup, cat_ixs, levels, concepts_all): #load student's files base = "../../../isaac_data_files/" #cats, cat_lookup, all_qids, users, _stretches_, levels, cat_ixs = init_objects(-1, path=base, seed=666) df = pd.read_csv(base + "hwgen1.csv", index_col=0, header=0) #runs = open(base + "by_runs/{}.txt".format(u)).readlines() fname = base+"by_user/{}.txt".format(u) try: attempts = pd.read_csv(fname, header=None) except FileNotFoundError: return [] runs = extract_runs_w_timestamp(attempts) u_run_ct = len(runs) all_zero_level = True run_ct=0 irts = {} subj_irts = {} print("num of runs", len(runs)) cat_levels = [0] * len(cats) concept_levels = [0] * 100 for run_ix, run in enumerate(runs): run_ct += 1 ts, q, n_atts, n_pass = run ts = pd.to_datetime(ts) # print("rum", run_ct, ts) if ts > ass_ts: break # print(ts, "<=", ass_ts) qt = q.replace("|","~") cat = cat_lookup[qt] lev = levels[qt] if q not in df.index: continue concepts_raw = df.loc[q, "related_concepts"] concepts = eval(concepts_raw) if not pd.isna(concepts_raw) else [] for c in concepts: if c not in irts: print("at run", run_ct, "new irt engine for", u,c) irts[c] = IRTEngine() else: irt = irts[c] irt.curr_theta = irt.update(lev, (n_pass > 0)) irts[c] = irt print(u, c,"history =", irt.history) print("at run", run_ct, "irt update", u, c, irt.curr_theta) if cat not in subj_irts: subj_irts[cat] = IRTEngine() else: subj_irts[cat].curr_theta = subj_irts[cat].update(lev, (n_pass > 0)) print("at run", run_ct, "irt cat update", u, cat, subj_irts[cat].curr_theta) for s in subj_irts: catix = cat_ixs[cat] theta = subj_irts[s].curr_theta print(s,"=",theta) cat_levels[catix]=theta if irts: print("\nConcept level abilities:") for c in irts: theta = irts[c].curr_theta print(c,"=",theta) conix = concepts_all.index(c) concept_levels[conix]=theta # concatd = cat_levels + concept_levels concatd = concept_levels print("Profile for user {}: {}".format(u, concatd)) if concatd==[]: print("empty") return concatd
cqmx = np.zeros(shape=n_qids) csqmx = np.zeros(shape=n_qids) cfqmx = np.zeros(shape=n_qids) usersf = open("direct_mcmc_users.txt", "w") eps_cnt = 0 qenc = np.zeros(shape=len(cats)) for u in users: qenc[:, :] = 0.0 xp = 0 eps_cnt += 1 print("user = "******"\n") attempts = pd.read_csv("../by_user/{}.txt".format(u), header=None) runs = extract_runs_w_timestamp(attempts) for run in runs[0:10]: ts, q, n_atts, n_pass = run q = q.replace("|", "~") qix = all_qids.index(q) xp += 1 qmx[qix] += qenc cqmx[qix] += 1 if n_pass > 0: sqmx[qix] += qenc csqmx[qix] += 1 else: fqmx[qix] += qenc cfqmx[qix] += 1 catix = cat_ixs[cat_lookup[q]] qenc[catix] += 1
end = dateutil.parser.parse(uqatts.iloc[-1, 0]) if start < target <= end: active_users.append(u) exp_mx_lookup[target] = numpy.zeros(shape=(len(active_users), len(all_qids))) print("Created exp_mx of shape:", exp_mx_lookup[target].shape) for uix, u in enumerate(active_users): X = numpy.zeros(shape=(1, len(all_qids))) #, dtype=numpy.int32) uqatts = pandas.read_csv(filebase + "by_user/{}.txt".format(u), header=None) uqatts[0] = pandas.to_datetime(uqatts[0]) uqatts = uqatts[uqatts[0] <= target] # print("size for {}".format(target), uqatts.size) for run in extract_runs_w_timestamp(uqatts): ts, q, n_atts, n_pass = run # q = run[0] # n_atts = run[1] # n_pass = run[2] q = q.replace("|", "~") # L = lev_lookup[q] c = cat_lookup[q] #decay the user's previous career exp_mx = exp_mx_lookup[target] #exp_mx[uix] = decay * exp_mx[uix] if (n_atts > 0): qix = q_ixs[q] exp_mx[uix, qix] = 1.0 # else: # qix = q_ixs[q]