def __init__(self): feature_cols = [ sql.Column('sixth_read_pl', VARCHAR), sql.Column('sixth_math_pl', VARCHAR), sql.Column('sixth_write_pl', VARCHAR), sql.Column('sixth_ctz_pl', VARCHAR), sql.Column('sixth_science_pl', VARCHAR), sql.Column('seventh_read_pl', VARCHAR), sql.Column('seventh_math_pl', VARCHAR), sql.Column('seventh_write_pl', VARCHAR), sql.Column('eighth_read_pl', VARCHAR), sql.Column('eighth_math_pl', VARCHAR), sql.Column('eighth_science_pl', VARCHAR), sql.Column('eighth_socstudies_pl', VARCHAR), ] pre_features_processor = CompositeFeatureProcessor([ # OAAOGTProcessor(), # adding this reduces testing perf CategoricalFeatureProcessor( column_list=oaaogt_features_config.categorical_columns) ]) post_features_processor = CompositeFeatureProcessor( [ImputeNullProcessor(fill_unspecified=ImputeBy.MEAN)]) super(OAAOGTFeatures, self).__init__( table_name='oaaogt_features', feature_cols=feature_cols, categorical_cols=[], post_features_processor=post_features_processor, pre_features_processor=pre_features_processor, high_school_features=False, )
def get_default_feature_processors(train_stats=None): train_stats = [None] * 2 if train_stats is None else train_stats return CompositeFeatureProcessor([ ImputeNullProcessor(train_stats=train_stats[0], fill_unspecified=ImputeBy.MEAN, col_flag_set=False), StandardizeProcessor(train_stats=train_stats[1]) ])
def __init__(self): joined_absences = self.get_joined_absences() super(AbsenceDescFeatures, self).__init__( table_name=inflection.underscore(AbsenceDescFeatures.__name__), categorical_cols=[], post_features_processor=CompositeFeatureProcessor( [ImputeNullProcessor(fill_unspecified=0, col_flag_set=False)]), data_table=joined_absences, blocking_col=joined_absences.c.absence_desc, )
def __init__(self): feature_cols = [ sql.Column('discipline_incident_rate', FLOAT), sql.Column('discipline_incident_rate_perc', FLOAT) ] super(DisciplineIncidentRateFeatures, self).__init__( table_name=inflection.underscore( DisciplineIncidentRateFeatures.__name__), feature_cols=feature_cols, categorical_cols=[], post_features_processor=CompositeFeatureProcessor( [ImputeNullProcessor(col_val_dict=config.fill_values)]))
def __init__(self): feature_cols = [ sql.Column('absence_rate',FLOAT), sql.Column('unexcused_absence_rate',FLOAT), sql.Column('absence_rate_perc',FLOAT) ] super(AbsenceFeatures, self).__init__( table_name=inflection.underscore(AbsenceFeatures.__name__), # converts AbsenceFeatures to 'absence_features' feature_cols=feature_cols, categorical_cols=[], post_features_processor=CompositeFeatureProcessor([ ImputeNullProcessor( col_val_dict= absence_features_config.fill_values ) ]) )
def __init__(self): cols = [ sql.Column('pivot_ms_grade', INT), sql.Column('pivot_ms_avg_gpa', FLOAT), ] feature_processor = CompositeFeatureProcessor( [ImputeNullProcessor(fill_unspecified=ImputeBy.MEAN)]) super(MiddleSchoolFeatures, self).__init__( table_name='middle_school_features', feature_cols=cols, categorical_cols=[], post_features_processor=feature_processor, pre_features_processor=PivotProcessor(index='student_lookup', columns='pivot_ms_grade', values='pivot_ms_avg_gpa'), high_school_features=False)
def __init__(self): all_inv = db_tables.clean_intervention_table index_cols_dict = { 'student_lookup': all_inv.c.student_lookup, 'school_year': sql.cast( db_func.substr(all_inv.c.school_year, db_func.length(all_inv.c.school_year) - 3, 4), sql.INT).label('school_year'), 'grade': all_inv.c.grade } super(InvFeatures, self).__init__( table_name=inflection.underscore(InvFeatures.__name__), categorical_cols=inv_features_config.categorical_columns, post_features_processor=CompositeFeatureProcessor( [ImputeNullProcessor(fill_unspecified=0)]), data_table=all_inv, blocking_col=all_inv.c.inv_group, index_cols_dict=index_cols_dict)
def __init__(self): cols = [ sql.Column('num_classes', INT), sql.Column('hs_avg_gpa', FLOAT), sql.Column('last_year_gpa', FLOAT), sql.Column('pre_2_year_gpa', FLOAT), sql.Column('pre_3_year_gpa', FLOAT), sql.Column('overall_percentile', FLOAT), sql.Column('district_percentile', FLOAT), sql.Column('school_percentile', FLOAT), ] feature_processor = CompositeFeatureProcessor([ ImputeNullProcessor( col_val_dict=academic_features_config.fill_values, fill_unspecified=False ) ]) super(AcademicFeatures, self).__init__( table_name='academic_features', feature_cols=cols, categorical_cols=[], post_features_processor=feature_processor )
def __init__(self): cols = [ sql.Column('gender', VARCHAR), sql.Column('ethnicity', VARCHAR), sql.Column('school_name', VARCHAR), sql.Column('district', VARCHAR), sql.Column('disability', VARCHAR), sql.Column('disadvantagement', VARCHAR), sql.Column('economic_disadvantagement', INT), sql.Column('academic_disadvantagement', INT), sql.Column('limited_english', VARCHAR), sql.Column('discipline_incidents', INT), sql.Column('pre_1_year_discipline_incidents', INT), sql.Column('pre_2_year_discipline_incidents', INT), sql.Column('days_absent', FLOAT), sql.Column('pre_1_year_days_absent', FLOAT), sql.Column('pre_2_year_days_absent', FLOAT), sql.Column('age', INT), sql.Column('num_transfers', INT), sql.Column('cumul_discipline_incidents', INT), ] feature_processor = CompositeFeatureProcessor([ ReplaceNullishProcessor( column_list=snapshot_features_config.replace_nullish_columns), ImputeNullProcessor( col_val_dict=snapshot_features_config.fill_values, col_flag_set=snapshot_features_config.impute_flag_columns), CategoricalFeatureProcessor( column_list=snapshot_features_config.categorical_columns) ]) super(SnapshotFeatures, self).__init__( table_name='snapshot_features', feature_cols=cols, categorical_cols=snapshot_features_config.categorical_columns, post_features_processor=feature_processor)