def load(ql, f_name="./res/leerpaden_app.xlsx", id_="simone"): print("Loading data") loader = DataLoader(f_name=f_name, s_name="Blad1") data, transfer_data = loader.load(quick_loading=ql) log_data = None if id_ not in ["test"]: log_data = loader.load_log() if loader.quick_loaded is False: print("Organizing data") # data["DateTime"] = loader.combine_date_time(data["SubmitDate"], # data["Time"]) if id_ in [ "kb_all", "kb_all_attempts_curve", "kb_smoothed_curves", "jm" ]: data = data[[ 'DateTime', 'UserId', 'ExerciseId', 'LOID', 'Correct', 'AbilityAfterAnswer', 'Effort', 'Lesson', 'LessonProgress' ]] else: data = data[[ 'DateTime', 'UserId', 'ExerciseId', 'LOID', 'Correct', 'AbilityAfterAnswer' ]] print("Preprocessing data") if id_ not in ["kb", "kb_all"]: if "LessonProgress" in data.columns: unfiltered = loader.sort_data_by( data, ["DateTime", "LessonProgress"]) else: unfiltered = loader.sort_data_by(data, "DateTime") else: unfiltered = data transfer_data = loader.first_attempts_only( ['UserId', 'ExerciseId', 'LOID'], df=transfer_data, copy_df=False) data = loader.filter(filters, df=unfiltered) # print(data.head()) if id_ in [ "karlijn_en_babette", "kb", "kb_all", "test", "jm", ]: data = PhaseFinder().find_gynzy_phases(data, id_) elif id_ in [ "kb_all_attempts_curve", "kb_smoothed_curves", ]: data = PhaseFinder().find_gynzy_phases_with_lesson_info(data, id_) else: data = PhaseFinder().find_phases(data) data = correct(data) loader.quick_save(transfer_data, f_name="quicktransfer.pkl") loader.quick_save(data) first_att_data = loader.first_attempts_only( ['UserId', 'ExerciseId', 'LOID'], df=data) # print(data.loc[data.UserId == 59491].tail(40).values) return data, first_att_data, transfer_data, log_data
def load_data(data_loc): """ Load the data from an external excel resource. Parameters ---------- data_loc: str Path to the data. Returns ------- pd.DataFrame Data frame containing the data with additional pre and post phases added. """ # load raw data loader = DataLoader(f_name=data_loc, s_name="Blad1") loaded_data, _ = loader.load(quick_loading=True) # Select columns if 'phase' in loaded_data.columns: loaded_data = loaded_data[[ 'DateTime', 'UserId', 'ExerciseId', 'LOID', 'Correct', 'AbilityAfterAnswer', 'Effort', 'Lesson', 'LessonProgress', 'phase' ]] else: loaded_data = loaded_data[[ 'DateTime', 'UserId', 'ExerciseId', 'LOID', 'Correct', 'AbilityAfterAnswer', 'Effort', 'Lesson', 'LessonProgress' ]] # Sort data loaded_data = loader.sort_data_by(loaded_data, ["DateTime", "LessonProgress"]) # Filter unneeded loaded_data = loader.filter(filters, df=loaded_data) if not loader.quick_loaded: loaded_data = PhaseFinder().find_gynzy_phases_with_lesson_info( loaded_data, "") loader.quick_save(loaded_data) return loaded_data