Ejemplo n.º 1
0
    if len(sys.argv) < 2:
        cmd = input("command please?")
    else:
        cmd = sys.argv[1]

    if cmd.startswith('g'):
        do_test = False
    else:
        do_test = True

    force_balanced_classes = True
    do_scaling = True
    optimise_predictors = True
    n_classes = 2
    print("n_users",n_users)
    cats, cat_lookup, all_qids, users, _stretches_, levels, cat_ixs = init_objects(n_users, seed=666)

    #users = open("../mcmc/mcmc_uesrs.txt").read().splitlines()

    passdiffs, stretches, passquals, all_qids = load_new_diffs()
    mcmcdiffs = load_mcmc_diffs()

    reports =[]
    report_name = "report_DW{}_{}_fb{}_opt{}_scale{}_{}.txt".format(0, n_users, str(1 if force_balanced_classes else 0), ("001" if optimise_predictors else "0"), ("1" if do_scaling else "0"), featureset_to_use)
    if do_test:
        report = open(report_name,"w")
    for w in [DW_NO_WEIGHT, DW_NATTS, DW_LEVEL, DW_PASSRATE, DW_MCMC, DW_STRETCH]:
        for alpha in [1.0, 0.9, 0.6, 0.3, 0.1]:
            for phi_retain in [1.0, 0.75, 0.25, 0.0]:
                print(cat_ixs)
                if do_test:
Ejemplo n.º 2
0
        print(user_summary_df.dtypes)
        user_summary_df.iloc[:, 0:15].hist(bins="auto")
        plt.show()
        exit()

    if cmd.startswith('g'):
        do_test = False
    else:
        do_test = True

    force_balanced_classes = True
    do_scaling = True
    optimise_predictors = True
    n_classes = 2
    print("n_users", n_users)
    cats, cat_lookup, all_qids, users, _stretches_, levels, cat_ixs = init_objects(
        n_users, path="../../../isaac_data_files/", seed=666)

    #users = open("../mcmc/mcmc_uesrs.txt").read().splitlines()

    passdiffs, stretches, passquals, all_qids = load_new_diffs(
        "../../../isaac_data_files/pass_diffs.csv")
    mcmcdf = pd.DataFrame.from_csv(
        "../../../isaac_data_files/mcmc/dir_mcmc_results.csv")

    sprofs = pd.DataFrame.from_csv(
        "../../../isaac_data_files/student_profiling/users_all.csv")
    sprofs = sprofs[sprofs["role"] == "STUDENT"]
    sprofs = sprofs[sprofs["date_of_birth"].notna()]
    sprofs = sprofs[sprofs.index.isin(users)]
    users = sprofs.index
    print(len(users))
Ejemplo n.º 3
0
from sklearn.externals import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MultiLabelBinarizer

from backfit.BackfitUtils import init_objects
from hwgen.concept_extract import concept_extract, page_to_concept_map
from hwgen.profiler import profile_student
from utils.utils import jaccard_score

base = "../../../isaac_data_files/"

#need to build a softmax classifier to recommend questions...
#this would have qn output nodes

n_users = -1
cats, cat_lookup, _, users, diffs, levels, cat_ixs = init_objects(n_users)


def make_gb_question_map():
    gbd_df = pandas.read_csv(base + "gameboards.txt", sep="~")
    map = {}
    for gb_id, item in zip(gbd_df["id"], gbd_df["questions"]):
        if str is not type(item):
            continue
        # print(gb_id)
        # print(item)
        item = item[1:-1]
        item = item.split(",")
        map[gb_id] = item
    return map
Ejemplo n.º 4
0
import random

from matplotlib import pyplot as plt

# INGREDIENTS
# Simulated student
# RL Tutor
# Goal - first to 100 correct questions
from isaac.itemencoding import create_S

target = 1000
n_users = 1000
# random.seed(666)
scores = []

cats, cat_lookup, all_qids, users, diffs, levels, cat_ixs = init_objects(
    n_users, path="../../isaacdata/")
passrates, stretches, passquals, all_qids = load_new_diffs(
    "../../isaacdata/pass_diffs.csv")
mcmcdf = pandas.read_csv("../../isaacdata/mcmc/dir_mcmc_results.csv",
                         header=0,
                         index_col=0)
qtypes = pandas.read_csv("../../isaacdata/atypes.csv",
                         header=None,
                         index_col=0)
predictor = pickle.load(open("p_LSVC_0.2_0.5.pkl", "rb"))
scaler = pickle.load(open("qutor_scaler.pkl", "rb"))
print("loaded data")

all_qids = list(all_qids)
random.shuffle(all_qids)
Ejemplo n.º 5
0
    zrows = numpy.nonzero(arr.sum(axis=1)==0)
    zcols = numpy.nonzero(arr.sum(axis=0)==0)
    arr = numpy.delete(arr, zrows, axis=0)
    arr = numpy.delete(arr, zcols, axis=1)
    return arr

create_xm = False
plot = True
if __name__ == '__main__':

    base = "../../../isaac_data_files/"

    #build user experience matrix here....
    qmeta = pandas.read_csv(base+"qmeta.csv", header=None)
    n_users=1000
    cats, cat_lookup, all_qids, users, diffs, levels, cat_ixs = init_objects(n_users, path=base)

    levels = set()
    lev_lookup = {}
    
    if create_xm:
        exp_mx = numpy.zeros(shape = (len(users), len(all_qids)))
        print("Created exp_mx of shape:", exp_mx.shape)
        
        for uix,u in enumerate(users):
            X = numpy.zeros(shape=(1,len(all_qids)))
            uqatts = pandas.read_csv(base+"by_user/{}.txt".format(u), header=None)
            runs = extract_runs_w_timestamp(uqatts)
            
            for run in runs:
                ts,q,n_atts,n_pass = run