def get_aggregates(self, date, delta): prenatal = self.inputs[0].get_result() aggregates = [ Count(), Aggregate(days('visit_d', 'date_of_birth'), ['min', 'max'], 'visit'), Aggregate(list(select_regexes(prenatal, ['service_.*'])), 'sum', fname=False), Aggregate('preg_nbr_n', 'max', 'previous_pregnancies', fname=False), Aggregate('lv_brth_n', 'max', 'previous_births', fname=False), Aggregate('othr_trm_n', 'max', 'previous_terminations', fname=False), Aggregate(lambda p: p.smk3_mth_f == 'Y', 'any', 'smoked_3mo', fname=False), Aggregate('cig3_day_n', 'max', 'cigarettes_per_day', fname=False), Aggregate(lambda p: p.drk3_mth_f == 'Y', 'any', 'drank_3mo', fname=False), Aggregate('dr_dy_wk_n', 'max', 'days_drank_per_week', fname=False), Aggregate('drnk_day_n', 'max', 'drinks_per_day', fname=False), ] return aggregates
def get_aggregates(self, date, delta): aggregates = [ Count(), Aggregate(days('visit_d', 'date_of_birth'), ['min', 'max'], 'visit'), Aggregate('serv_typ_c', lambda s: set(s), 'service', fname=False), Aggregate('preg_nbr_n', 'max', 'previous_pregnancies', fname=False), Aggregate('lv_brth_n', 'max', 'previous_births', fname=False), Aggregate('othr_trm_n', 'max', 'previous_terminations', fname=False), Aggregate(lambda p: p.smk3_mth_f == 'Y', 'any', 'smoked_3mo', fname=False), Aggregate('cig3_day_n', 'max', 'cigarettes_per_day', fname=False), Aggregate(lambda p: p.drk3_mth_f == 'Y', 'any', 'drank_3mo', fname=False), Aggregate('dr_dy_wk_n', 'max', 'days_drank_per_week', fname=False), Aggregate('drnk_day_n', 'max', 'drinks_per_day', fname=False), Aggregate('clinicid_i', lambda c: set(c), 'clinic', fname=False) ] return aggregates
def get_aggregates(self, date, delta): kid_count = Aggregate('kid_id', 'nunique', name='kid_count', fname=False) aggregates = [ Count(), Aggregate('bll', ['mean', 'median', 'max', 'min', 'std']), Aggregate(lambda t: t.bll.where(t.increase), ['mean', 'median', 'max', 'min', 'std'], 'increase_bll'), Count(lambda t: t.bll <= 2, 'bll2', prop=True), # prevalences Fraction(Count(['first_bll6', 'first_bll10']), kid_count, include_numerator=True, include_denominator=True), ] # incidences if delta != 'all': start_date = date - data.parse_delta(delta) no_bll6_count = Aggregate(lambda k: k.kid_id.where( (k.first_bll6_sample_date >= start_date).fillna(True)), 'nunique', name='no_bll6_count', fname=False) no_bll10_count = Aggregate(lambda k: k.kid_id.where( (k.first_bll10_sample_date >= start_date).fillna(True)), 'nunique', name='no_bll10_count', fname=False) aggregates.extend([ no_bll6_count, no_bll10_count, Count('first_bll6') / no_bll6_count, Count('first_bll10') / no_bll10_count ]) if delta == 'all': aggregates.extend([ Aggregate(days('date', date), ['min', 'max'], 'days_since_test'), Aggregate([ lambda t: (date - t.date.where(t.bll >= 6)) / day, lambda t: (date - t.date.where(t.bll >= 10)) / day ], ['min', 'max'], ['days_since_bll6', 'days_since_bll10']) ]) return aggregates
def get_aggregates(self, date, delta): kid_count = Aggregate('kid_id', 'nunique', name='kid_count', fname=False) aggregates = [ Count(), Aggregate('bll', ['mean', 'median', 'max', 'min', 'std']), Count(lambda t: t.bll <= 2, 'bll2', prop=True), Fraction(Count(['first_bll6', 'first_bll10']), kid_count, include_numerator=True, include_denominator=True), ] if delta == 'all': aggregates.extend([ Aggregate(days('date',date), ['min','max'], 'days_since_test'), Aggregate([ lambda t: (date - t.date.where(t.bll >= 6))/day, lambda t: (date - t.date.where(t.bll >= 10))/day], ['min','max'], ['days_since_bll6', 'days_since_bll10']) ]) return aggregates
def get_aggregates(self, date, index, delta): if index == 'kid': return [ Aggregate( ['test_address_count', 'address_count', 'test_count'], 'max', fname=False), Aggregate(['max_bll'], 'max', fname=False), # Comment out this and all other wic aggregates because they can't be lagged # and they're not useful for predicting poisoning #Aggregate(lambda k: k.last_wic_date == k.address_wic_max_date, # 'any', 'last_wic_address', fname=False), #Aggregate(['address_wic_mother', 'address_wic_infant'], 'any', fname=False), #Aggregate([days('address_wic_max_date', date), # days('address_wic_min_date', date), # days('last_wic_date', date), # days('first_wic_date', date)], # ['max'], ['address_wic_min_date', 'address_wic_max_date', # 'last_wic_date', 'first_wic_date'], fname=False) ] sample_2y = lambda k: ((k.last_sample_date - k.date_of_birth) / day > 365 * 2) | (k.max_bll >= 6) counts = Count([np.float32(1), sample_2y], ['kid', 'kid_sample_2y']) aggregates = [ counts, Aggregate(['test_address_count', 'test_count', 'address_count'], ['median', 'mean', 'min', 'max']), Count([ lambda k: k.address_test_min_date.notnull(), lambda k: k.first_sample_date.notnull() ], prop=True, name=['tested_here', 'tested_ever']), #Count(lambda k: k.first_wic_date.notnull(), prop=True, name='wic'), #Count([lambda k: k.address_wic_min_date.notnull() & k.address_test_min_date.notnull(), # lambda k: k.address_wic_min_date.notnull() & k.first_sample_date.notnull()], # name=['wic_tested_here', 'wic_tested_ever'], # prop=lambda k: k.first_wic_date.notnull(), prop_name='wic'), Aggregate( [ days('address_min_date', 'address_max_date'), #days('address_wic_min_date', 'address_wic_max_date'), days('address_test_min_date', 'address_test_max_date') ], ['mean'], [ 'address_total_time', #'address_wic_time', 'address_test_time' ]), # the first of these are kid level, not address-kid level # that means kids get double counted when aggregated to above the address level # if they lived in multiple addresses on that e.g. census tract. oh well. Aggregate([ 'max_bll', 'avg_bll', 'cumulative_bll', 'avg_cumulative_bll', 'mean_bll', 'address_max_bll', 'address_mean_bll' ], ['mean', 'median', 'min', 'max']), # ebll past, present, future, ever count the number of kids who # moved into this address in the period defined by date and delta # and who were poisoned before, during, after or ever relative to their time living there Fraction(Count([ lambda k: k.first_bll6_sample_date.notnull(), lambda k: k.first_bll10_sample_date.notnull() ], ['bll6_ever', 'bll10_ever']), counts, include_numerator=True), Fraction(Count([ lambda k: k.first_bll6_sample_date > k.address_max_date, lambda k: k.first_bll10_sample_date > k.address_max_date ], ['bll6_future', 'bll10_future']), counts, include_numerator=True), Fraction(Count([ lambda k: k.first_bll6_sample_date < k.address_min_date, lambda k: k.first_bll10_sample_date < k.address_min_date ], ['bll6_past', 'bll10_past']), counts, include_numerator=True), Fraction(Count([ lambda k: k.first_bll6_sample_date.between( k.address_min_date, k.address_max_date), lambda k: k.first_bll10_sample_date.between( k.address_min_date, k.address_max_date) ], ['bll6_present', 'bll10_present']), counts, include_numerator=True), Aggregate('last_name', 'nunique', fname='count', astype=str) # TODO: min_last_sample_age cutoffs ] if delta == 'all': aggregates.extend([ #Aggregate(days('address_wic_min_date', date), ['min', 'max'], 'days_since_wic'), Aggregate(days('date_of_birth', date), ['min', 'max', 'mean'], 'date_of_birth'), ]) return aggregates
def get_aggregates(self, date, index, delta): if index == 'kid': return [ Aggregate(['address_count', 'test_count'], 'max', fname=False), Aggregate(['max_bll'], 'max', fname=False), Aggregate(lambda k: k.last_wic_date == k.address_wic_max_date, 'any', 'last_wic_address', fname=False), Aggregate(['address_wic_mother', 'address_wic_infant'], 'any', fname=False), Aggregate([ days('address_wic_max_date', date), days('address_wic_min_date', date), days('last_wic_date', date), days('first_wic_date', date) ], ['max'], [ 'address_wic_min_date', 'address_wic_max_date', 'last_wic_date', 'first_wic_date' ], fname=False) ] sample_2y = lambda k: ((k.last_sample_date - k.date_of_birth) / day > 365 * 2) | (k.max_bll >= 6) counts = Count([np.float32(1), sample_2y], ['kid', 'kid_sample_2y']) aggregates = [ counts, Aggregate(['address_count', 'test_count'], ['median', 'mean', 'min', 'max']), Count([ lambda k: k.address_test_min_date.notnull(), lambda k: k.first_sample_date.notnull(), lambda k: k.first_wic_date.notnull() ], prop=True, name=['tested_here', 'tested_ever', 'wic']), Count([ lambda k: k.address_wic_min_date.notnull( ) & k.address_test_min_date.notnull(), lambda k: k. address_wic_min_date.notnull() & k.first_sample_date.notnull() ], name=['wic_tested_here', 'wic_tested_ever'], parent=lambda k: k.first_wic_date.notnull()), Aggregate([ days('address_min_date', 'address_max_date'), days('address_wic_min_date', 'address_wic_max_date'), days('address_test_min_date', 'address_test_max_date') ], ['mean'], [ 'address_total_time', 'address_wic_time', 'address_test_time' ]), Aggregate( ['max_bll', 'mean_bll', 'address_max_bll', 'address_mean_bll'], ['mean', 'median', 'min', 'max']), Fraction(Count([ lambda k: k.first_bll6_sample_date.notnull(), lambda k: k.first_bll10_sample_date.notnull() ], ['bll6_ever', 'bll10_ever']), counts, include_numerator=True), Fraction(Count([ lambda k: k.first_bll6_sample_date > k.address_max_date, lambda k: k.first_bll10_sample_date > k.address_max_date ], ['bll6_future', 'bll10_future']), counts, include_numerator=True), Fraction(Count([ lambda k: k.first_bll6_sample_date < k.address_min_date, lambda k: k.first_bll10_sample_date < k.address_min_date ], ['bll6_past', 'bll10_past']), counts, include_numerator=True), Fraction(Count([ lambda k: k.first_bll6_sample_date.between( k.address_min_date, k.address_max_date), lambda k: k.first_bll10_sample_date.between( k.address_min_date, k.address_max_date) ], ['bll6_present', 'bll10_present']), counts, include_numerator=True), Aggregate('last_name', 'nunique', fname='count', astype=str) # TODO: min_last_sample_age cutoffs ] if delta == 'all': aggregates.extend([ Aggregate(days('address_wic_min_date', date), ['min', 'max'], 'days_since_wic'), Aggregate(days('date_of_birth', date), ['min', 'max', 'mean'], 'date_of_birth'), ]) return aggregates
def get_aggregates(self, date, index, delta): if index == 'kid': return [ Aggregate(['address_count', 'test_count'], 'max', fname=False), Aggregate(['max_bll'], 'max', fname=False), Aggregate(lambda k: k.last_wic_date == k.address_wic_max_date, 'any', 'last_wic_address', fname=False), Aggregate(['address_wic_mother', 'address_wic_infant'], 'any', fname=False), Aggregate([days('address_wic_max_date', date), days('address_wic_min_date', date), days('last_wic_date', date), days('first_wic_date', date)], ['max'], ['address_wic_min_date', 'address_wic_max_date', 'last_wic_date', 'first_wic_date'], fname=False) ] sample_2y = lambda k: ((k.last_sample_date - k.date_of_birth)/day > 365*2) | (k.max_bll >= 6) counts = Count([np.float32(1), sample_2y], ['kid', 'kid_sample_2y']) aggregates = [ counts, Aggregate(['address_count', 'test_count'], ['median', 'mean', 'min', 'max']), Count([lambda k: k.address_test_min_date.notnull(), lambda k: k.first_sample_date.notnull(), lambda k: k.first_wic_date.notnull()], prop=True, name=['tested_here', 'tested_ever', 'wic']), Count([lambda k: k.address_wic_min_date.notnull() & k.address_test_min_date.notnull(), lambda k: k.address_wic_min_date.notnull() & k.first_sample_date.notnull()], name=['wic_tested_here', 'wic_tested_ever'], parent=lambda k: k.first_wic_date.notnull()), Aggregate([days('address_min_date', 'address_max_date'), days('address_wic_min_date', 'address_wic_max_date'), days('address_test_min_date', 'address_test_max_date')], ['mean'], ['address_total_time', 'address_wic_time', 'address_test_time']), Aggregate(['max_bll', 'mean_bll', 'address_max_bll', 'address_mean_bll'], ['mean', 'median', 'min', 'max']), Fraction(Count([lambda k: k.first_bll6_sample_date.notnull(), lambda k: k.first_bll10_sample_date.notnull()], ['bll6_ever', 'bll10_ever']), counts, include_numerator=True), Fraction(Count([lambda k: k.first_bll6_sample_date > k.address_max_date, lambda k: k.first_bll10_sample_date > k.address_max_date], ['bll6_future', 'bll10_future']), counts, include_numerator=True), Fraction(Count([lambda k: k.first_bll6_sample_date < k.address_min_date, lambda k: k.first_bll10_sample_date < k.address_min_date], ['bll6_past', 'bll10_past']), counts, include_numerator=True), Fraction(Count([lambda k: k.first_bll6_sample_date.between( k.address_min_date, k.address_max_date), lambda k: k.first_bll10_sample_date.between( k.address_min_date, k.address_max_date)], ['bll6_present', 'bll10_present']), counts, include_numerator=True), Aggregate('last_name', 'nunique', fname='count', astype=str) # TODO: min_last_sample_age cutoffs ] if delta == 'all': aggregates.extend([ Aggregate(days('address_wic_min_date', date), ['min', 'max'], 'days_since_wic'), Aggregate(days('date_of_birth', date), ['min', 'max', 'mean'], 'date_of_birth'), ]) return aggregates