def _feature_aggregations_space_time_lookback(self, engine): return { 'DispatchType': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='dispatch_final_type_code', lookup_table='lookup_dispatch_types', prefix='DispatchType'), ['sum', 'avg']), 'DispatchInitiatiationType': collate.Aggregate( {"DispatchInitiatiationType_ci": "(dispatch_category = 'CI')::int", "DispatchInitiatiationType_oi": "(dispatch_category = 'OI')::int", "DispatchInitiatiationType_al": "(dispatch_category = 'AL')::int"}, ['sum', 'avg']), 'DispatchDivision': collate.Aggregate( {"DispatchDivision_07": "(division_id = '07')::int", "DispatchDivision_21": "(division_id = '21')::int", "DispatchDivision_02": "(division_id = '02')::int", "DispatchDivision_06": "(division_id = '06')::int", "DispatchDivision_11": "(division_id = '11')::int", "DispatchDivision_14": "(division_id = '14')::int", "DispatchDivision_22": "(division_id = '22')::int", "DispatchDivision_28": "(division_id = '28')::int", "DispatchDivision_16": "(division_id = '16')::int", "DispatchDivision_01": "(division_id = '01')::int", "DispatchDivision_27": "(division_id = '27')::int", "DispatchDivision_17": "(division_id = '17')::int", "DispatchDivision_12": "(division_id = '12')::int", "DispatchDivision_26": "(division_id = '26')::int"}, ['sum']) }
def _feature_aggregations_space_time_lookback(self, engine): return { 'IncidentsOfType': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='grouped_incident_type_code', lookup_table='lookup_incident_types', prefix='IncidentsOfType'), ['sum', 'avg']), 'IncidentsOfTypeDep': collate.Aggregate( self._group_category_conditions_str(engine, column_name='department_defined_policy_type', schema='staging', table='incidents', prefix='IncidentsOfTypeDep'), ['sum', 'avg']), 'ComplaintsTypeSource': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='origination_type_code', lookup_table='lookup_complaint_origins', prefix='ComplaintsTypeSource', fix_condition='origination_type_code NOTNULL'), ['sum', 'avg']), 'AllAllegations': collate.Aggregate( {"AllAllegations": "number_of_allegations"}, ['sum', 'avg']), }
def _feature_aggregations_sub(self, engine): return { 'ArrestMonthlyVariance': collate.Aggregate( {"ArrestMonthlyVariance": 'count_officer'}, ['variance']), 'ArrestMonthlyCV': collate.Aggregate( # TODO {"ArrestMonthlyCOV": 'count_officer'}, ['cv']) }
def _feature_aggregations_sub(self, engine): return { 'OfficerRolePayGradeChange': collate.Aggregate( {"OfficerRolePayGradeChange": 'count_paygrade'}, ['variance']), 'OfficerRolePoliceAreaChange': collate.Aggregate( {"OfficerRolePoliceAreaChange": 'count_policearea'}, ['variance']), }
def test_aggregate_arithmetic(): n = collate.Aggregate("x", "sum", {}) d = collate.Aggregate("1", "count", {}) m = collate.Aggregate("y", "avg", {}) e = list((n / d + m).get_columns(prefix="prefix_"))[0] assert str(e) == "((sum(x)*1.0 / count(1)) + avg(y))" assert e.name == "prefix_x_sum/1_count+y_avg"
def _feature_aggregations_space_time_lookback(self, engine): return { 'OutsideEmploymentHours': collate.Aggregate( {"OutsideEmploymentHours": "hours_on_shift"}, ['sum', 'avg']), 'OutsideEmploymentIncome': collate.Aggregate( {"OutsideEmploymentIncome": "hours_on_shift*hourly_rate"}, ['sum', 'avg']) }
def _feature_aggregations_space_time_lookback(self, engine): return { 'OfficerRoleBidTransfer': collate.Aggregate( {"OfficerRoleBidTransfer": "bid_transfer"}, ['sum', 'avg']), 'OfficerRoleNoBidNoPayTransfer': collate.Aggregate( {"OfficerRoleNoBidNoPayTransfer": "no_pay_no_bid_change_transfer"}, ['sum', 'avg']), }
def _feature_aggregations_space_time_lookback(self, engine): return { 'ShiftsOfType': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='shift_type_code', lookup_table='lookup_shift_types', prefix='ShiftsOfType'), ['sum', 'avg']), 'HoursPerShift': collate.Aggregate( {'HoursPerShift': '(EXTRACT( EPOCH from shift_length)/3600)'}, ['avg', 'sum']) }
def _feature_aggregations(self, engine): return { 'EISInterventionsOfType': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='intervention_type', lookup_table='lookup_intervention_types', prefix='EISInterventionsOfType'), ['sum']), 'EISFlagsOfType': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='event_type', lookup_table='lookup_eis_flag_types', prefix='EISFlagsOfType'), ['sum']), }
def _feature_aggregations_space_time_lookback(self, engine): return { 'IncidentsByOutcome': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='final_ruling_code', lookup_table='lookup_final_rulings', prefix='IncidentsByOutcome'), ['avg', 'sum']), 'IncidentsOfTypeOutSustained': collate.Aggregate( {"IncidentsOfTypeOutSustained_use_of_force": "(department_defined_policy_type = 'Use Of Force' and "+AllegationOutcome.sustained.value+")::int", "IncidentsOfTypeOutSustained_tdd": "(department_defined_policy_type = 'TDD' and "+AllegationOutcome.sustained.value+")::int", "IncidentsOfTypeOutSustained_complaint": "(department_defined_policy_type = 'Complaint' and "+AllegationOutcome.sustained.value+")::int", "IncidentsOfTypeOutSustained_pursuit": "(department_defined_policy_type = 'Pursuit' and "+AllegationOutcome.sustained.value+")::int", "IncidentsOfTypeOutSustained_dof": "(department_defined_policy_type = 'DOF' and "+AllegationOutcome.sustained.value+")::int", "IncidentsOfTypeOutSustained_raid_search": "(department_defined_policy_type = 'Raid And Search' and "+AllegationOutcome.sustained.value+")::int", "IncidentsOfTypeOutSustained_injury": "(department_defined_policy_type = 'Injury' and "+AllegationOutcome.sustained.value+")::int", "IncidentsOfTypeOutSustained_icd": "(department_defined_policy_type = 'ICD' and "+AllegationOutcome.sustained.value+")::int", "IncidentsOfTypeOutSustained_nfsi": "(department_defined_policy_type = 'NFSI' and "+AllegationOutcome.sustained.value+")::int", "IncidentsOfTypeOutSustained_accident": "(department_defined_policy_type = 'Accident' and "+AllegationOutcome.sustained.value+")::int"}, ['sum', 'avg']), 'IncidentsOfTypeUnSustained': collate.Aggregate( { "IncidentsOfTypeUnSustained_use_of_force": "(department_defined_policy_type = 'Use Of Force' and " + AllegationOutcome.unsustained.value + ")::int", "IncidentsOfTypeUnSustained_tdd": "(department_defined_policy_type = 'TDD' and " + AllegationOutcome.unsustained.value + ")::int", "IncidentsOfTypeUnSustained_complaint": "(department_defined_policy_type = 'Complaint' and " + AllegationOutcome.unsustained.value + ")::int", "IncidentsOfTypeUnSustained_pursuit": "(department_defined_policy_type = 'Pursuit' and " + AllegationOutcome.unsustained.value + ")::int", "IncidentsOfTypeUnSustained_dof": "(department_defined_policy_type = 'DOF' and " + AllegationOutcome.unsustained.value + ")::int", "IncidentsOfTypeUnSustained_raid_search": "(department_defined_policy_type = 'Raid And Search' and " + AllegationOutcome.unsustained.value + ")::int", "IncidentsOfTypeUnSustained_injury": "(department_defined_policy_type = 'Injury' and " + AllegationOutcome.unsustained.value + ")::int", "IncidentsOfTypeUnSustained_icd": "(department_defined_policy_type = 'ICD' and " + AllegationOutcome.unsustained.value + ")::int", "IncidentsOfTypeUnSustained_nfsi": "(department_defined_policy_type = 'NFSI' and " + AllegationOutcome.unsustained.value + ")::int", "IncidentsOfTypeUnSustained_accident": "(department_defined_policy_type = 'Accident' and " + AllegationOutcome.unsustained.value + ")::int"}, ['sum', 'avg']), 'IncidentsOfTypeUnknown': collate.Aggregate( { "IncidentsOfTypeUnknown_use_of_force": "(department_defined_policy_type = 'Use Of Force' and " + AllegationOutcome.unknown.value + ")::int", "IncidentsOfTypeUnknown_tdd": "(department_defined_policy_type = 'TDD' and " + AllegationOutcome.unknown.value + ")::int", "IncidentsOfTypeUnknown_complaint": "(department_defined_policy_type = 'Complaint' and " + AllegationOutcome.unknown.value + ")::int", "IncidentsOfTypeUnknown_pursuit": "(department_defined_policy_type = 'Pursuit' and " + AllegationOutcome.unknown.value + ")::int", "IncidentsOfTypeUnknown_dof": "(department_defined_policy_type = 'DOF' and " + AllegationOutcome.unknown.value + ")::int", "IncidentsOfTypeUnknown_raid_search": "(department_defined_policy_type = 'Raid And Search' and " + AllegationOutcome.unknown.value + ")::int", "IncidentsOfTypeUnknown_injury": "(department_defined_policy_type = 'Injury' and " + AllegationOutcome.unknown.value + ")::int", "IncidentsOfTypeUnknown_icd": "(department_defined_policy_type = 'ICD' and " + AllegationOutcome.unknown.value + ")::int", "IncidentsOfTypeUnknown_nfsi": "(department_defined_policy_type = 'NFSI' and " + AllegationOutcome.unknown.value + ")::int", "IncidentsOfTypeUnknown_accident": "(department_defined_policy_type = 'Accident' and " + AllegationOutcome.unknown.value + ")::int"}, ['sum', 'avg']), }
def _feature_aggregations_space_time(self, engine): return { 'DaysSinceLastAllegation': collate.Aggregate( {"DaysSinceLastAllegation": "EXTRACT( DAY FROM ('{collate_date}' - report_date))"}, ['min']) }
def _feature_aggregations_space_time(self, engine): return { 'OfficerAge': collate.Aggregate( {"OfficerAge": "EXTRACT( DAY FROM ('{collate_date}' - date_of_birth)/365)"}, ['max']) }
def _feature_aggregations_space_time(self, engine): return { 'DaysSinceLastCompletedAllegation': collate.Aggregate( {"DaysSinceLastCompletedAllegation": "EXTRACT(DAY FROM ('{collate_date}' - date_of_judgment))"}, ['min']), }
def _feature_aggregations_space_time_lookback(self, engine): return { 'FieldInterviews': collate.Aggregate( {"FieldInterviews": 'event_id'}, ['sum']), 'HourOfFieldInterviews': collate.Aggregate( {"HourOfFieldInterviews": "date_part('hour',event_datetime)-12"}, ['avg']), 'FieldInterviewsByRace': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='interviewed_person_race', lookup_table='lookup_races', prefix='FieldInterviewsByRace'), ['sum', 'avg']), 'FieldInterviewsByOutcome': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='field_interview_outcome_code', lookup_table='lookup_field_interview_outcomes', prefix='FieldInterviewsByOutcome'), ['sum','avg']), 'FieldInterviewsWithFlag': collate.Aggregate( {"FieldInterviewsWithFlag_searched": 'searched_flag::int', "FieldInterviewsWithFlag_drugs": 'drugs_found_flag::int', "FieldInterviewsWithFlag_weapons": 'weapons_found_flag::int'}, ['sum', 'avg']), 'InterviewsType': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='field_interview_type_code', lookup_table='lookup_field_interview_types', prefix='InterviewsType'), ['sum','avg']), 'ModeHourOfFieldInterviews': collate.Aggregate( {"ModeHourOfFieldInterviews": ""}, 'mode', "date_part('hour',event_datetime)-12") }
def _feature_aggregations_space_time_lookback(self, engine): return { 'DispatchType': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='dispatch_final_type_code', lookup_table='lookup_dispatch_types', prefix='DispatchType'), ['sum', 'avg']), 'DispatchInitiatiationType': collate.Aggregate( self._group_category_conditions_str(engine, column_name='dispatch_category', schema='staging', table='dispatches', prefix='DispatchInitiatiationType'), ['sum', 'avg']), 'DispatchDivision': collate.Aggregate( self._group_category_conditions_str(engine, column_name='division_id', schema='staging', table='dispatch_geo_time_officer', prefix='DispatchDivision'), ['sum']), }
def _feature_aggregations_space_time_lookback(self, engine): return { 'Arrests': collate.Aggregate( {"Arrests": 'event_id'}, ['count']), 'ArrestsOfType': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='arrest_type_code', lookup_table='lookup_arrest_types', prefix='ArrestsOfType'), ['sum', 'avg']), 'ArrestsON': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='arrest_day_of_week', lookup_table='lookup_days_of_week', prefix='ArrestsON'), ['sum', 'avg']), 'SuspectsArrestedOfRace': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='suspect_race_code', lookup_table='lookup_races', prefix='SuspectsArrestedOfRace'), ['sum', 'avg']), 'SuspectsArrestedOfEthnicity': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='suspect_ethnicity_code', lookup_table='lookup_ethnicities', prefix='SuspectsArrestedOfEthnicity'), ['sum', 'avg']), 'ArrestsCrimeType': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='ucr4_grouped_code', lookup_table='lookup_ucr4_grouped_dispatch_types', prefix='ArrestsCrimeType'), ['sum', 'avg']) }
def _feature_aggregations(self, engine): return { 'UsesOfForceOfType': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='use_of_force_type_code', lookup_table='lookup_use_of_force_type', prefix='UsesOfForceOfType'), ['sum']), 'UnjustifiedUsesOfForceOfType': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='use_of_force_type_code', lookup_table='lookup_use_of_force_type', fix_condition='number_of_unjustified_allegations >0', prefix='UnjustifiedUsesOfForceOfType'), ['sum']), 'UnjustUOFInterventionsOfType': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='intervention_type_code', lookup_table='lookup_intervention_types', fix_condition='number_of_unjustified_allegations >0', prefix='UnjustUOFInterventionsOfType'), ['sum']), 'OFwithSuspectInjury': collate.Aggregate( {"OFwithSuspectInjury": '(suspect_injury)::int'}, ['sum']) }
def _feature_aggregations_space_time_lookback(self, engine): return { 'DispTypeTravelTimeM': collate.Aggregate( self._lookup_values_conditions_multiplier(engine, column_code_name='dispatch_final_type_code', lookup_table='lookup_dispatch_types', prefix='DispTypeTravelTimeM', multiplier='travel_time_minutes'), ['sum', 'avg']), 'DispTypeResponseTimeM': collate.Aggregate( self._lookup_values_conditions_multiplier(engine, column_code_name='dispatch_final_type_code', lookup_table='lookup_dispatch_types', prefix='DispTypeResponseTimeM', multiplier='response_time_minutes'), ['sum', 'avg']), 'DispTypeTimeOnSceneM': collate.Aggregate( self._lookup_values_conditions_multiplier(engine, column_code_name='dispatch_final_type_code', lookup_table='lookup_dispatch_types', prefix='DispTypeTimeOnSceneM', multiplier='time_on_scene_minutes'), ['sum', 'avg']), }
def test_distinct(): assert str( list(collate.Aggregate("distinct x", "count", {}).get_columns())[0]) == "count(distinct x)" assert str( list( collate.Aggregate("distinct x", "count", {}).get_columns(when="date < '2012-01-01'")) [0]) == "count(distinct x) FILTER (WHERE date < '2012-01-01')" assert str( list( collate.Aggregate("distinct(x)", "count", {}).get_columns(when="date < '2012-01-01'")) [0]) == "count(distinct (x)) FILTER (WHERE date < '2012-01-01')" assert str( list( collate.Aggregate("distinct(x,y)", "count", {}).get_columns(when="date < '2012-01-01'")) [0]) == "count(distinct (x,y)) FILTER (WHERE date < '2012-01-01')"
def _feature_aggregations(self, engine): return { 'DummyOfficerGender': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='gender_code', lookup_table='lookup_genders', prefix='DummyOfficerGender'), ['max']), 'DummyOfficerRace': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='race_code', lookup_table='lookup_races', prefix='DummyOfficerRace'), ['max']), 'DummyOfficerEthnicity': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='ethnicity_code', lookup_table='lookup_ethnicities', prefix='DummyOfficerEthnicity'), ['max']), # 'OfficerAge': collate.Aggregate( # {"OfficerAge": "extract(day from '{date}'::timestamp - date_of_birth)/365"}, ['max']), # 'DummyOfficerEducation': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='education_level_code', lookup_table='lookup_education_levels', prefix='DummyOfficerEducation'), ['max']), 'DummyOfficerMarital': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='marital_status_code', lookup_table='lookup_marital_statuses', prefix='DummyOfficerMarital'), ['max']), 'DummyOfficerMilitary': collate.Aggregate( {"DummyOfficerMilitary": 'military_service_flag::int'}, ['max']), 'AcademyScore': collate.Aggregate( {"AcademyScore": 'score'}, ['max']), 'DummyOfficerRank': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='rank_code', lookup_table='lookup_ranks', prefix='DummyOfficerRank'), ['max']) }
def test_aggregate_imputation_lookup_all(): agg = collate.Aggregate( "a", ["avg", "sum"], { "coltype": "aggregate", "all": { "type": "zero" }, "sum": { "type": "constant", "value": 3 }, "max": { "type": "mean" } }) assert agg.column_imputation_lookup()['a_avg']['type'] == 'zero' assert agg.column_imputation_lookup()['a_avg']['coltype'] == 'aggregate' assert agg.column_imputation_lookup()['a_sum']['type'] == 'constant' assert agg.column_imputation_lookup()['a_sum']['value'] == 3 assert agg.column_imputation_lookup()['a_sum']['coltype'] == 'aggregate'
def _feature_aggregations_space_time_lookback(self, engine): return { 'InterventionsOfType': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='intervention_type_code', lookup_table='lookup_intervention_types', prefix='InterventionsOfType'), ['sum', 'avg']), 'IncidentsOfType': collate.Aggregate( {"IncidentsOfType_use_of_force": "(department_defined_policy_type = 'Use Of Force')::int" , "IncidentsOfType_tdd": "(department_defined_policy_type = 'TDD')::int", "IncidentsOfType_complaint": "(department_defined_policy_type = 'Complaint')::int", "IncidentsOfType_pursuit": "(department_defined_policy_type = 'Pursuit')::int", "IncidentsOfType_dof": "(department_defined_policy_type = 'DOF')::int", "IncidentsOfType_raid_search": "(department_defined_policy_type = 'Raid And Search')::int", "IncidentsOfType_injury": "(department_defined_policy_type = 'Injury')::int", "IncidentsOfType_icd": "(department_defined_policy_type = 'ICD')::int", "IncidentsOfType_nfsi": "(department_defined_policy_type = 'NFSI')::int", "IncidentsOfType_accident": "(department_defined_policy_type = 'Accident')::int"}, ['sum', 'avg']), 'ComplaintsTypeSource': collate.Aggregate( self._lookup_values_conditions(engine, column_code_name='origination_type_code', lookup_table='lookup_complaint_origins', prefix='ComplaintsTypeSource'), ['sum', 'avg']), 'SuspensionsOfType': collate.Aggregate( {"SuspensionsOfType_active": "(hours_active_suspension > 0)::int", "SuspensionsOfType_inactive": "(hours_inactive_suspension > 0)::int"}, ['sum', 'avg']), 'HoursSuspensionsOfType': collate.Aggregate( {"HoursSuspensionsOfType_active": "hours_active_suspension", "HoursSuspensionsOfType_inactive": "hours_inactive_suspension"}, ['sum', 'avg']), 'AllAllegations': collate.Aggregate( {"AllAllegations": "number_of_allegations"}, ['sum', 'avg']), }
def _feature_aggregations_space_time_lookback(self, engine): return { 'Compliments': collate.Aggregate( {"Compliments": 'event_id'},['count']) }
def test_aggregate_tuple_quantity_when(): agg = collate.Aggregate(("x", "y"), "corr", {}) assert str(list(agg.get_columns(when="date < '2012-01-01'"))[0]) == ( "corr(x, y) FILTER (WHERE date < '2012-01-01')")
def test_aggregate_tuple_quantity(): agg = collate.Aggregate(("x", "y"), "corr", {}) assert str(list(agg.get_columns())[0]) == "corr(x, y)"
def test_ordered_aggregate_when(): agg = collate.Aggregate("", "mode", {}, "x") assert str(list(agg.get_columns(when="date < '2012-01-01'"))[0]) == ( "mode() WITHIN GROUP (ORDER BY x) FILTER (WHERE date < '2012-01-01')")
def test_ordered_aggregate(): agg = collate.Aggregate("", "mode", {}, "x") assert str(list( agg.get_columns())[0]) == "mode() WITHIN GROUP (ORDER BY x)" assert list(agg.get_columns())[0].name == "x_mode"
def test_aggregate_when(): agg = collate.Aggregate("1", "count", {}) assert str(list(agg.get_columns(when="date < '2012-01-01'"))[0]) == ( "count(1) FILTER (WHERE date < '2012-01-01')")
def test_aggregate(): agg = collate.Aggregate("*", "count", {}) assert str(list(agg.get_columns())[0]) == "count(*)"
def test_aggregate_format_kwargs(): agg = collate.Aggregate("'{collate_date}' - date", "min", {}) assert str( list(agg.get_columns( format_kwargs={"collate_date": "2012-01-01"}))[0]) == ( "min('2012-01-01' - date)")