예제 #1
0
    def __init__(self):
        feature_cols = [
            sql.Column('sixth_read_pl', VARCHAR),
            sql.Column('sixth_math_pl', VARCHAR),
            sql.Column('sixth_write_pl', VARCHAR),
            sql.Column('sixth_ctz_pl', VARCHAR),
            sql.Column('sixth_science_pl', VARCHAR),
            sql.Column('seventh_read_pl', VARCHAR),
            sql.Column('seventh_math_pl', VARCHAR),
            sql.Column('seventh_write_pl', VARCHAR),
            sql.Column('eighth_read_pl', VARCHAR),
            sql.Column('eighth_math_pl', VARCHAR),
            sql.Column('eighth_science_pl', VARCHAR),
            sql.Column('eighth_socstudies_pl', VARCHAR),
        ]

        pre_features_processor = CompositeFeatureProcessor([
            # OAAOGTProcessor(), # adding this reduces testing perf
            CategoricalFeatureProcessor(
                column_list=oaaogt_features_config.categorical_columns)
        ])

        post_features_processor = CompositeFeatureProcessor(
            [ImputeNullProcessor(fill_unspecified=ImputeBy.MEAN)])

        super(OAAOGTFeatures, self).__init__(
            table_name='oaaogt_features',
            feature_cols=feature_cols,
            categorical_cols=[],
            post_features_processor=post_features_processor,
            pre_features_processor=pre_features_processor,
            high_school_features=False,
        )
예제 #2
0
def get_default_feature_processors(train_stats=None):
    train_stats = [None] * 2 if train_stats is None else train_stats
    return CompositeFeatureProcessor([
        ImputeNullProcessor(train_stats=train_stats[0],
                            fill_unspecified=ImputeBy.MEAN,
                            col_flag_set=False),
        StandardizeProcessor(train_stats=train_stats[1])
    ])
    def __init__(self):
        joined_absences = self.get_joined_absences()

        super(AbsenceDescFeatures, self).__init__(
            table_name=inflection.underscore(AbsenceDescFeatures.__name__),
            categorical_cols=[],
            post_features_processor=CompositeFeatureProcessor(
                [ImputeNullProcessor(fill_unspecified=0, col_flag_set=False)]),
            data_table=joined_absences,
            blocking_col=joined_absences.c.absence_desc,
        )
예제 #4
0
 def __init__(self):
     feature_cols = [
         sql.Column('discipline_incident_rate', FLOAT),
         sql.Column('discipline_incident_rate_perc', FLOAT)
     ]
     super(DisciplineIncidentRateFeatures, self).__init__(
         table_name=inflection.underscore(
             DisciplineIncidentRateFeatures.__name__),
         feature_cols=feature_cols,
         categorical_cols=[],
         post_features_processor=CompositeFeatureProcessor(
             [ImputeNullProcessor(col_val_dict=config.fill_values)]))
 def __init__(self):
     feature_cols = [
         sql.Column('absence_rate',FLOAT),
         sql.Column('unexcused_absence_rate',FLOAT),
         sql.Column('absence_rate_perc',FLOAT)
     ]
     super(AbsenceFeatures, self).__init__(
         table_name=inflection.underscore(AbsenceFeatures.__name__), # converts AbsenceFeatures to 'absence_features'
         feature_cols=feature_cols,
         categorical_cols=[],
         post_features_processor=CompositeFeatureProcessor([
             ImputeNullProcessor(
                 col_val_dict= absence_features_config.fill_values
             )
         ])
     )
    def __init__(self):
        cols = [
            sql.Column('pivot_ms_grade', INT),
            sql.Column('pivot_ms_avg_gpa', FLOAT),
        ]
        feature_processor = CompositeFeatureProcessor(
            [ImputeNullProcessor(fill_unspecified=ImputeBy.MEAN)])

        super(MiddleSchoolFeatures, self).__init__(
            table_name='middle_school_features',
            feature_cols=cols,
            categorical_cols=[],
            post_features_processor=feature_processor,
            pre_features_processor=PivotProcessor(index='student_lookup',
                                                  columns='pivot_ms_grade',
                                                  values='pivot_ms_avg_gpa'),
            high_school_features=False)
    def __init__(self):
        all_inv = db_tables.clean_intervention_table
        index_cols_dict = {
            'student_lookup':
            all_inv.c.student_lookup,
            'school_year':
            sql.cast(
                db_func.substr(all_inv.c.school_year,
                               db_func.length(all_inv.c.school_year) - 3, 4),
                sql.INT).label('school_year'),
            'grade':
            all_inv.c.grade
        }

        super(InvFeatures, self).__init__(
            table_name=inflection.underscore(InvFeatures.__name__),
            categorical_cols=inv_features_config.categorical_columns,
            post_features_processor=CompositeFeatureProcessor(
                [ImputeNullProcessor(fill_unspecified=0)]),
            data_table=all_inv,
            blocking_col=all_inv.c.inv_group,
            index_cols_dict=index_cols_dict)
 def __init__(self):
     cols = [
         sql.Column('num_classes', INT),
         sql.Column('hs_avg_gpa', FLOAT),
         sql.Column('last_year_gpa', FLOAT),
         sql.Column('pre_2_year_gpa', FLOAT),
         sql.Column('pre_3_year_gpa', FLOAT),
         sql.Column('overall_percentile', FLOAT),
         sql.Column('district_percentile', FLOAT),
         sql.Column('school_percentile', FLOAT),
     ]
     feature_processor = CompositeFeatureProcessor([
         ImputeNullProcessor(
             col_val_dict=academic_features_config.fill_values,
             fill_unspecified=False
         )
     ])
     super(AcademicFeatures, self).__init__(
         table_name='academic_features',
         feature_cols=cols,
         categorical_cols=[],
         post_features_processor=feature_processor
     )
예제 #9
0
    def __init__(self):
        cols = [
            sql.Column('gender', VARCHAR),
            sql.Column('ethnicity', VARCHAR),
            sql.Column('school_name', VARCHAR),
            sql.Column('district', VARCHAR),
            sql.Column('disability', VARCHAR),
            sql.Column('disadvantagement', VARCHAR),
            sql.Column('economic_disadvantagement', INT),
            sql.Column('academic_disadvantagement', INT),
            sql.Column('limited_english', VARCHAR),
            sql.Column('discipline_incidents', INT),
            sql.Column('pre_1_year_discipline_incidents', INT),
            sql.Column('pre_2_year_discipline_incidents', INT),
            sql.Column('days_absent', FLOAT),
            sql.Column('pre_1_year_days_absent', FLOAT),
            sql.Column('pre_2_year_days_absent', FLOAT),
            sql.Column('age', INT),
            sql.Column('num_transfers', INT),
            sql.Column('cumul_discipline_incidents', INT),
        ]

        feature_processor = CompositeFeatureProcessor([
            ReplaceNullishProcessor(
                column_list=snapshot_features_config.replace_nullish_columns),
            ImputeNullProcessor(
                col_val_dict=snapshot_features_config.fill_values,
                col_flag_set=snapshot_features_config.impute_flag_columns),
            CategoricalFeatureProcessor(
                column_list=snapshot_features_config.categorical_columns)
        ])
        super(SnapshotFeatures, self).__init__(
            table_name='snapshot_features',
            feature_cols=cols,
            categorical_cols=snapshot_features_config.categorical_columns,
            post_features_processor=feature_processor)