Esempio n. 1
0
def get_students_to_be_ignored():
    """

    @return: Reads the ignore list from feature config.
    """
    config = read_yaml(definitions.FEATURE_CONFIG_FILE_PATH)
    return config['students']['student_ignore_list']
Esempio n. 2
0
def get_hyper_parameter_list_for_grid_search(
        experiment="multitask_learner_auto_encoder"):
    experiment_config = read_utils.read_yaml(
        definitions.GRID_SEARCH_CONFIG_FILE_PATH)[experiment]
    hyper_parameter_list = []
    params = experiment_config.keys()

    for param in params:
        hyper_parameter_list.append(experiment_config[param])

    hyper_parameters_list = list(itertools.product(*hyper_parameter_list))
    final_hyper_prameters_list = []

    for hyper_parameters in hyper_parameters_list:
        hyper_parameters_dict = {}
        for idx, param in enumerate(params):
            hyper_parameters_dict[param] = hyper_parameters[idx]

        final_hyper_prameters_list.append(hyper_parameters_dict)

    return final_hyper_prameters_list
from src import definitions
from src.bin import validations as validations
from src.data_manager import splitter
from src.data_manager import helper as data_manager_helper
from src.data_processing import normalizer
from src.utils import read_utils
from src.utils import student_utils
from src.utils import set_utils
from src.utils import data_conversion_utils as conversions
from src.data_processing import covariates

VAR_BINNED_DATA_CONFIG = read_utils.read_yaml(
    definitions.DATA_MANAGER_CONFIG_FILE_PATH)[
        definitions.VAR_BINNED_DATA_MANAGER_ROOT]
ADJUST_LABELS_WRT_MEDIAN = VAR_BINNED_DATA_CONFIG['adjust_labels_wrt_median']
FLATTEN_SEQUENCE_TO_COLS = VAR_BINNED_DATA_CONFIG['flatten_sequence_to_cols']

DEFAULT_STUDENT_LIST = VAR_BINNED_DATA_CONFIG[
    definitions.STUDENT_LIST_CONFIG_KEY]
available_students = student_utils.get_available_students(
    definitions.BINNED_ON_VAR_FREQ_DATA_PATH)
DEFAULT_STUDENT_LIST = list(
    set(DEFAULT_STUDENT_LIST).intersection(set(available_students)))

FEATURE_LIST = VAR_BINNED_DATA_CONFIG[definitions.FEATURE_LIST_CONFIG_KEY]
LABEL_LIST = VAR_BINNED_DATA_CONFIG[definitions.LABEL_LIST_CONFIG_KEY]
COVARIATE_LIST = VAR_BINNED_DATA_CONFIG[definitions.COVARIATE_LIST_CONFIG_KEY]
NORMALIZE_STRAT = VAR_BINNED_DATA_CONFIG['normalize_strategy']

if VAR_BINNED_DATA_CONFIG['process_covariates_as_regular_features']:
    FEATURE_LIST = FEATURE_LIST + COVARIATE_LIST
Esempio n. 4
0
    ROOT_DIR, "../data/surveys_and_covariates")
STUDENT_RAW_DATA_ANALYSIS_ROOT = os.path.join(
    ROOT_DIR, "../data/raw_student_data_information")

# Data Tuple Indices
DATA_TUPLE_LEN = 6
ACTUAL_DATA_IDX = 0
MISSING_FLAGS_IDX = 1
TIME_DELTA_IDX = 2
COVARIATE_DATA_IDX = 3
HISTOGRAM_IDX = 4
LABELS_IDX = -1  # Always last!

# Data Folder Paths - CLUSTER
# Overwrite Global Constants when cluster mode on.
config = read_yaml(FEATURE_CONFIG_FILE_PATH)
if config['cluster_mode']:
    cluster_data_root = config['data_paths']['cluster_data_path']
    MINIMAL_PROCESSED_DATA_PATH = pathlib.Path(
        os.path.join(cluster_data_root, "student_life_minimal_processed_data"))
    BINNED_ON_VAR_FREQ_DATA_PATH = pathlib.Path(
        os.path.join(cluster_data_root, "student_life_var_binned_data"))
    SURVEYS_AND_COVARIATES_DATA_PATH = pathlib.Path(
        os.path.join(cluster_data_root, "surveys_and_covariates"))

# Labels

ADJUST_WRT_MEDIAN = read_yaml(DATA_MANAGER_CONFIG_FILE_PATH)[
    'student_life_var_binned_data']['adjust_labels_wrt_median']

if ADJUST_WRT_MEDIAN:
"""
Script to generate binned aggregates based on the configuration per feature.
"""
import os
import pandas as pd

from src.definitions import MINIMAL_PROCESSED_DATA_PATH, \
    FEATURE_CONFIG_FILE_PATH, STUDENT_FOLDER_NAME_PREFIX, BINNED_ON_VAR_FREQ_DATA_PATH
from src.utils.read_utils import read_yaml
from src.utils.write_utils import df_to_csv
from src.utils import student_utils
from src.data_processing import helper

# Reading Configs.
FEATURE_CONFIG = read_yaml(FEATURE_CONFIG_FILE_PATH)['features']
AVAILABLE_FEATURE = FEATURE_CONFIG.keys()
COVARIATES = read_yaml(FEATURE_CONFIG_FILE_PATH)['covariates']
STUDENT_CONFIG = read_yaml(FEATURE_CONFIG_FILE_PATH)['students']
AVAILABLE_STUDENTS = student_utils.get_available_students(
    MINIMAL_PROCESSED_DATA_PATH)
students = read_yaml(FEATURE_CONFIG_FILE_PATH)['students']['student_list']

if students:
    AVAILABLE_STUDENTS = list(
        set(students).intersection(set(AVAILABLE_STUDENTS)))

############## Main Loop To Process Data ##################

for student_id in AVAILABLE_STUDENTS:

    student_data = []
Esempio n. 6
0
from src import definitions
from sklearn import metrics
from src.bin import tensorify
from src.bin import plotting
from src.data_manager import cross_val
from src.grid_search import helper
from src.utils import data_conversion_utils as conversions
from src.utils import read_utils
from src.utils import write_utils
from statistics import mean as list_mean
from src.bin import statistics

F_SCORE_INDEX = 2

TRAINING_DATA_FILE_NAME = read_utils.read_yaml(
    definitions.GRID_SEARCH_CONFIG_FILE_PATH)['data_file_name']


def get_hyper_parameter_list_for_grid_search(
        experiment="multitask_learner_auto_encoder"):
    experiment_config = read_utils.read_yaml(
        definitions.GRID_SEARCH_CONFIG_FILE_PATH)[experiment]
    hyper_parameter_list = []
    params = experiment_config.keys()

    for param in params:
        hyper_parameter_list.append(experiment_config[param])

    hyper_parameters_list = list(itertools.product(*hyper_parameter_list))
    final_hyper_prameters_list = []
Esempio n. 7
0
import pandas as pd
import numpy as np

from src import definitions
from src.utils import read_utils
from src.bin import validations as validations
from src.data_processing import aggregates
from src.data_processing import covariates as covariate_processor
from src.data_processing import imputation

FEATURE_IMPUTATION_STRATEGY = read_utils.read_yaml(definitions.FEATURE_CONFIG_FILE_PATH)[
    'feature_imputation_strategy']

COVARIATE_FUNC_MAPPING = {
    'day_of_week': covariate_processor.day_of_week,
    'epoch_of_day': covariate_processor.epoch_of_day,
    'time_since_last_label': covariate_processor.time_since_last_label_min,
    'time_to_next_label': covariate_processor.time_to_next_label_min,
    'gender': covariate_processor.evaluate_gender,
    'previous_stress_label': covariate_processor.previous_stress_label,
    'time_to_next_deadline': covariate_processor.time_to_next_deadline
}

AGGREGATE_FUNC_MAPPING = {
    'mode': aggregates.mode,
    'inferred_feature': aggregates.inferred_feature,
    'robust_sum': aggregates.robust_sum,
    'time': aggregates.time_group,
    "0": aggregates.count_0,
    "1": aggregates.count_1,
    "2": aggregates.count_2,
Esempio n. 8
0
"""
Script for training StudentLife on GRU-D
"""
import torch

from src.bin import plotting
from src.bin import scoring
from src.experiments.grud import helper
from src.bin import trainer
from src.bin import tensorify
from src import definitions
from src.utils import read_utils as reader
from src.models.grud import GRUD

GRU_D_CONFIG = reader.read_yaml(definitions.MODEL_CONFIG_FILE_PATH)['gru_d']
CLUSTER_MODE = reader.read_yaml(
    definitions.FEATURE_CONFIG_FILE_PATH)['cluster_mode']


def initialize_gru(num_features,
                   hidden_size,
                   output_size,
                   x_mean,
                   num_layers,
                   learning_rate,
                   dropout_type='mloss'):
    ######################## Initialization ########################
    # Note : GRUD accepts data with rows as features and columns as time steps!
    model = GRUD(input_size=num_features,
                 hidden_size=hidden_size,
                 output_size=output_size,