def get_students_to_be_ignored(): """ @return: Reads the ignore list from feature config. """ config = read_yaml(definitions.FEATURE_CONFIG_FILE_PATH) return config['students']['student_ignore_list']
def get_hyper_parameter_list_for_grid_search( experiment="multitask_learner_auto_encoder"): experiment_config = read_utils.read_yaml( definitions.GRID_SEARCH_CONFIG_FILE_PATH)[experiment] hyper_parameter_list = [] params = experiment_config.keys() for param in params: hyper_parameter_list.append(experiment_config[param]) hyper_parameters_list = list(itertools.product(*hyper_parameter_list)) final_hyper_prameters_list = [] for hyper_parameters in hyper_parameters_list: hyper_parameters_dict = {} for idx, param in enumerate(params): hyper_parameters_dict[param] = hyper_parameters[idx] final_hyper_prameters_list.append(hyper_parameters_dict) return final_hyper_prameters_list
from src import definitions from src.bin import validations as validations from src.data_manager import splitter from src.data_manager import helper as data_manager_helper from src.data_processing import normalizer from src.utils import read_utils from src.utils import student_utils from src.utils import set_utils from src.utils import data_conversion_utils as conversions from src.data_processing import covariates VAR_BINNED_DATA_CONFIG = read_utils.read_yaml( definitions.DATA_MANAGER_CONFIG_FILE_PATH)[ definitions.VAR_BINNED_DATA_MANAGER_ROOT] ADJUST_LABELS_WRT_MEDIAN = VAR_BINNED_DATA_CONFIG['adjust_labels_wrt_median'] FLATTEN_SEQUENCE_TO_COLS = VAR_BINNED_DATA_CONFIG['flatten_sequence_to_cols'] DEFAULT_STUDENT_LIST = VAR_BINNED_DATA_CONFIG[ definitions.STUDENT_LIST_CONFIG_KEY] available_students = student_utils.get_available_students( definitions.BINNED_ON_VAR_FREQ_DATA_PATH) DEFAULT_STUDENT_LIST = list( set(DEFAULT_STUDENT_LIST).intersection(set(available_students))) FEATURE_LIST = VAR_BINNED_DATA_CONFIG[definitions.FEATURE_LIST_CONFIG_KEY] LABEL_LIST = VAR_BINNED_DATA_CONFIG[definitions.LABEL_LIST_CONFIG_KEY] COVARIATE_LIST = VAR_BINNED_DATA_CONFIG[definitions.COVARIATE_LIST_CONFIG_KEY] NORMALIZE_STRAT = VAR_BINNED_DATA_CONFIG['normalize_strategy'] if VAR_BINNED_DATA_CONFIG['process_covariates_as_regular_features']: FEATURE_LIST = FEATURE_LIST + COVARIATE_LIST
ROOT_DIR, "../data/surveys_and_covariates") STUDENT_RAW_DATA_ANALYSIS_ROOT = os.path.join( ROOT_DIR, "../data/raw_student_data_information") # Data Tuple Indices DATA_TUPLE_LEN = 6 ACTUAL_DATA_IDX = 0 MISSING_FLAGS_IDX = 1 TIME_DELTA_IDX = 2 COVARIATE_DATA_IDX = 3 HISTOGRAM_IDX = 4 LABELS_IDX = -1 # Always last! # Data Folder Paths - CLUSTER # Overwrite Global Constants when cluster mode on. config = read_yaml(FEATURE_CONFIG_FILE_PATH) if config['cluster_mode']: cluster_data_root = config['data_paths']['cluster_data_path'] MINIMAL_PROCESSED_DATA_PATH = pathlib.Path( os.path.join(cluster_data_root, "student_life_minimal_processed_data")) BINNED_ON_VAR_FREQ_DATA_PATH = pathlib.Path( os.path.join(cluster_data_root, "student_life_var_binned_data")) SURVEYS_AND_COVARIATES_DATA_PATH = pathlib.Path( os.path.join(cluster_data_root, "surveys_and_covariates")) # Labels ADJUST_WRT_MEDIAN = read_yaml(DATA_MANAGER_CONFIG_FILE_PATH)[ 'student_life_var_binned_data']['adjust_labels_wrt_median'] if ADJUST_WRT_MEDIAN:
""" Script to generate binned aggregates based on the configuration per feature. """ import os import pandas as pd from src.definitions import MINIMAL_PROCESSED_DATA_PATH, \ FEATURE_CONFIG_FILE_PATH, STUDENT_FOLDER_NAME_PREFIX, BINNED_ON_VAR_FREQ_DATA_PATH from src.utils.read_utils import read_yaml from src.utils.write_utils import df_to_csv from src.utils import student_utils from src.data_processing import helper # Reading Configs. FEATURE_CONFIG = read_yaml(FEATURE_CONFIG_FILE_PATH)['features'] AVAILABLE_FEATURE = FEATURE_CONFIG.keys() COVARIATES = read_yaml(FEATURE_CONFIG_FILE_PATH)['covariates'] STUDENT_CONFIG = read_yaml(FEATURE_CONFIG_FILE_PATH)['students'] AVAILABLE_STUDENTS = student_utils.get_available_students( MINIMAL_PROCESSED_DATA_PATH) students = read_yaml(FEATURE_CONFIG_FILE_PATH)['students']['student_list'] if students: AVAILABLE_STUDENTS = list( set(students).intersection(set(AVAILABLE_STUDENTS))) ############## Main Loop To Process Data ################## for student_id in AVAILABLE_STUDENTS: student_data = []
from src import definitions from sklearn import metrics from src.bin import tensorify from src.bin import plotting from src.data_manager import cross_val from src.grid_search import helper from src.utils import data_conversion_utils as conversions from src.utils import read_utils from src.utils import write_utils from statistics import mean as list_mean from src.bin import statistics F_SCORE_INDEX = 2 TRAINING_DATA_FILE_NAME = read_utils.read_yaml( definitions.GRID_SEARCH_CONFIG_FILE_PATH)['data_file_name'] def get_hyper_parameter_list_for_grid_search( experiment="multitask_learner_auto_encoder"): experiment_config = read_utils.read_yaml( definitions.GRID_SEARCH_CONFIG_FILE_PATH)[experiment] hyper_parameter_list = [] params = experiment_config.keys() for param in params: hyper_parameter_list.append(experiment_config[param]) hyper_parameters_list = list(itertools.product(*hyper_parameter_list)) final_hyper_prameters_list = []
import pandas as pd import numpy as np from src import definitions from src.utils import read_utils from src.bin import validations as validations from src.data_processing import aggregates from src.data_processing import covariates as covariate_processor from src.data_processing import imputation FEATURE_IMPUTATION_STRATEGY = read_utils.read_yaml(definitions.FEATURE_CONFIG_FILE_PATH)[ 'feature_imputation_strategy'] COVARIATE_FUNC_MAPPING = { 'day_of_week': covariate_processor.day_of_week, 'epoch_of_day': covariate_processor.epoch_of_day, 'time_since_last_label': covariate_processor.time_since_last_label_min, 'time_to_next_label': covariate_processor.time_to_next_label_min, 'gender': covariate_processor.evaluate_gender, 'previous_stress_label': covariate_processor.previous_stress_label, 'time_to_next_deadline': covariate_processor.time_to_next_deadline } AGGREGATE_FUNC_MAPPING = { 'mode': aggregates.mode, 'inferred_feature': aggregates.inferred_feature, 'robust_sum': aggregates.robust_sum, 'time': aggregates.time_group, "0": aggregates.count_0, "1": aggregates.count_1, "2": aggregates.count_2,
""" Script for training StudentLife on GRU-D """ import torch from src.bin import plotting from src.bin import scoring from src.experiments.grud import helper from src.bin import trainer from src.bin import tensorify from src import definitions from src.utils import read_utils as reader from src.models.grud import GRUD GRU_D_CONFIG = reader.read_yaml(definitions.MODEL_CONFIG_FILE_PATH)['gru_d'] CLUSTER_MODE = reader.read_yaml( definitions.FEATURE_CONFIG_FILE_PATH)['cluster_mode'] def initialize_gru(num_features, hidden_size, output_size, x_mean, num_layers, learning_rate, dropout_type='mloss'): ######################## Initialization ######################## # Note : GRUD accepts data with rows as features and columns as time steps! model = GRUD(input_size=num_features, hidden_size=hidden_size, output_size=output_size,