예제 #1
0
    if cmd.startswith('g'):
        do_test = False
    else:
        do_test = True

    force_balanced_classes = True
    do_scaling = True
    optimise_predictors = True
    n_classes = 2
    print("n_users", n_users)
    cats, cat_lookup, all_qids, users, _stretches_, levels, cat_ixs = init_objects(
        n_users, path="../../../isaac_data_files/", seed=666)

    #users = open("../mcmc/mcmc_uesrs.txt").read().splitlines()

    passdiffs, stretches, passquals, all_qids = load_new_diffs(
        "../../../isaac_data_files/pass_diffs.csv")
    mcmcdf = pd.DataFrame.from_csv(
        "../../../isaac_data_files/mcmc/dir_mcmc_results.csv")

    sprofs = pd.DataFrame.from_csv(
        "../../../isaac_data_files/student_profiling/users_all.csv")
    sprofs = sprofs[sprofs["role"] == "STUDENT"]
    sprofs = sprofs[sprofs["date_of_birth"].notna()]
    sprofs = sprofs[sprofs.index.isin(users)]
    users = sprofs.index
    print(len(users))

    # adf = load_atypes("../../isaacdata/new_atypes.csv")
    # passdiffs = adf.loc[:,"med_passrate"]
    # stretches = adf.loc[:,"med_n_pass"] / adf.loc[:,"med_n_atts"]
예제 #2
0
    if cmd.startswith('g'):
        do_test = False
    else:
        do_test = True

    force_balanced_classes = True
    do_scaling = True
    optimise_predictors = True
    n_classes = 2
    print("n_users",n_users)
    cats, cat_lookup, all_qids, users, _stretches_, levels, cat_ixs = init_objects(n_users, seed=666)

    #users = open("../mcmc/mcmc_uesrs.txt").read().splitlines()

    passdiffs, stretches, passquals, all_qids = load_new_diffs()
    mcmcdiffs = load_mcmc_diffs()

    reports =[]
    report_name = "report_DW{}_{}_fb{}_opt{}_scale{}_{}.txt".format(0, n_users, str(1 if force_balanced_classes else 0), ("001" if optimise_predictors else "0"), ("1" if do_scaling else "0"), featureset_to_use)
    if do_test:
        report = open(report_name,"w")
    for w in [DW_NO_WEIGHT, DW_NATTS, DW_LEVEL, DW_PASSRATE, DW_MCMC, DW_STRETCH]:
        for alpha in [1.0, 0.9, 0.6, 0.3, 0.1]:
            for phi_retain in [1.0, 0.75, 0.25, 0.0]:
                print(cat_ixs)
                if do_test:
                    print("testing")
                    xfn = "F33_{}_{}_{}_X.csv".format(str(alpha), str(phi_retain), w)
                    yfn = "F33_{}_{}_{}_y.csv".format(str(alpha), str(phi_retain), w)
                    X_train, X_test, y_pred_tr, y_pred, y_true, scaler = train_and_test(alpha, predictors, predictor_params, xfn, yfn, n_users, percTest, featureset_to_use, w, phi_retain, force_balanced_classes, do_scaling, optimise_predictors, report=report)
예제 #3
0
from matplotlib import pyplot as plt

# INGREDIENTS
# Simulated student
# RL Tutor
# Goal - first to 100 correct questions
from isaac.itemencoding import create_S

target = 1000
n_users = 1000
# random.seed(666)
scores = []

cats, cat_lookup, all_qids, users, diffs, levels, cat_ixs = init_objects(
    n_users, path="../../isaacdata/")
passrates, stretches, passquals, all_qids = load_new_diffs(
    "../../isaacdata/pass_diffs.csv")
mcmcdf = pandas.read_csv("../../isaacdata/mcmc/dir_mcmc_results.csv",
                         header=0,
                         index_col=0)
qtypes = pandas.read_csv("../../isaacdata/atypes.csv",
                         header=None,
                         index_col=0)
predictor = pickle.load(open("p_LSVC_0.2_0.5.pkl", "rb"))
scaler = pickle.load(open("qutor_scaler.pkl", "rb"))
print("loaded data")

all_qids = list(all_qids)
random.shuffle(all_qids)

actions = tuple(all_qids)[0:25]
# qutor = Qutor(alpha=0.1, gamma=1.0, eps=1000, actions=actions)