if len(sys.argv) < 2: cmd = input("command please?") else: cmd = sys.argv[1] if cmd.startswith('g'): do_test = False else: do_test = True force_balanced_classes = True do_scaling = True optimise_predictors = True n_classes = 2 print("n_users",n_users) cats, cat_lookup, all_qids, users, _stretches_, levels, cat_ixs = init_objects(n_users, seed=666) #users = open("../mcmc/mcmc_uesrs.txt").read().splitlines() passdiffs, stretches, passquals, all_qids = load_new_diffs() mcmcdiffs = load_mcmc_diffs() reports =[] report_name = "report_DW{}_{}_fb{}_opt{}_scale{}_{}.txt".format(0, n_users, str(1 if force_balanced_classes else 0), ("001" if optimise_predictors else "0"), ("1" if do_scaling else "0"), featureset_to_use) if do_test: report = open(report_name,"w") for w in [DW_NO_WEIGHT, DW_NATTS, DW_LEVEL, DW_PASSRATE, DW_MCMC, DW_STRETCH]: for alpha in [1.0, 0.9, 0.6, 0.3, 0.1]: for phi_retain in [1.0, 0.75, 0.25, 0.0]: print(cat_ixs) if do_test:
print(user_summary_df.dtypes) user_summary_df.iloc[:, 0:15].hist(bins="auto") plt.show() exit() if cmd.startswith('g'): do_test = False else: do_test = True force_balanced_classes = True do_scaling = True optimise_predictors = True n_classes = 2 print("n_users", n_users) cats, cat_lookup, all_qids, users, _stretches_, levels, cat_ixs = init_objects( n_users, path="../../../isaac_data_files/", seed=666) #users = open("../mcmc/mcmc_uesrs.txt").read().splitlines() passdiffs, stretches, passquals, all_qids = load_new_diffs( "../../../isaac_data_files/pass_diffs.csv") mcmcdf = pd.DataFrame.from_csv( "../../../isaac_data_files/mcmc/dir_mcmc_results.csv") sprofs = pd.DataFrame.from_csv( "../../../isaac_data_files/student_profiling/users_all.csv") sprofs = sprofs[sprofs["role"] == "STUDENT"] sprofs = sprofs[sprofs["date_of_birth"].notna()] sprofs = sprofs[sprofs.index.isin(users)] users = sprofs.index print(len(users))
from sklearn.externals import joblib from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler, MultiLabelBinarizer from backfit.BackfitUtils import init_objects from hwgen.concept_extract import concept_extract, page_to_concept_map from hwgen.profiler import profile_student from utils.utils import jaccard_score base = "../../../isaac_data_files/" #need to build a softmax classifier to recommend questions... #this would have qn output nodes n_users = -1 cats, cat_lookup, _, users, diffs, levels, cat_ixs = init_objects(n_users) def make_gb_question_map(): gbd_df = pandas.read_csv(base + "gameboards.txt", sep="~") map = {} for gb_id, item in zip(gbd_df["id"], gbd_df["questions"]): if str is not type(item): continue # print(gb_id) # print(item) item = item[1:-1] item = item.split(",") map[gb_id] = item return map
import random from matplotlib import pyplot as plt # INGREDIENTS # Simulated student # RL Tutor # Goal - first to 100 correct questions from isaac.itemencoding import create_S target = 1000 n_users = 1000 # random.seed(666) scores = [] cats, cat_lookup, all_qids, users, diffs, levels, cat_ixs = init_objects( n_users, path="../../isaacdata/") passrates, stretches, passquals, all_qids = load_new_diffs( "../../isaacdata/pass_diffs.csv") mcmcdf = pandas.read_csv("../../isaacdata/mcmc/dir_mcmc_results.csv", header=0, index_col=0) qtypes = pandas.read_csv("../../isaacdata/atypes.csv", header=None, index_col=0) predictor = pickle.load(open("p_LSVC_0.2_0.5.pkl", "rb")) scaler = pickle.load(open("qutor_scaler.pkl", "rb")) print("loaded data") all_qids = list(all_qids) random.shuffle(all_qids)
zrows = numpy.nonzero(arr.sum(axis=1)==0) zcols = numpy.nonzero(arr.sum(axis=0)==0) arr = numpy.delete(arr, zrows, axis=0) arr = numpy.delete(arr, zcols, axis=1) return arr create_xm = False plot = True if __name__ == '__main__': base = "../../../isaac_data_files/" #build user experience matrix here.... qmeta = pandas.read_csv(base+"qmeta.csv", header=None) n_users=1000 cats, cat_lookup, all_qids, users, diffs, levels, cat_ixs = init_objects(n_users, path=base) levels = set() lev_lookup = {} if create_xm: exp_mx = numpy.zeros(shape = (len(users), len(all_qids))) print("Created exp_mx of shape:", exp_mx.shape) for uix,u in enumerate(users): X = numpy.zeros(shape=(1,len(all_qids))) uqatts = pandas.read_csv(base+"by_user/{}.txt".format(u), header=None) runs = extract_runs_w_timestamp(uqatts) for run in runs: ts,q,n_atts,n_pass = run