Ejemplo n.º 1
0
    def get_aggregates(self, date, delta):
        prenatal = self.inputs[0].get_result()

        aggregates = [
            Count(),
            Aggregate(days('visit_d', 'date_of_birth'), ['min', 'max'],
                      'visit'),
            Aggregate(list(select_regexes(prenatal, ['service_.*'])),
                      'sum',
                      fname=False),
            Aggregate('preg_nbr_n', 'max', 'previous_pregnancies',
                      fname=False),
            Aggregate('lv_brth_n', 'max', 'previous_births', fname=False),
            Aggregate('othr_trm_n',
                      'max',
                      'previous_terminations',
                      fname=False),
            Aggregate(lambda p: p.smk3_mth_f == 'Y',
                      'any',
                      'smoked_3mo',
                      fname=False),
            Aggregate('cig3_day_n', 'max', 'cigarettes_per_day', fname=False),
            Aggregate(lambda p: p.drk3_mth_f == 'Y',
                      'any',
                      'drank_3mo',
                      fname=False),
            Aggregate('dr_dy_wk_n', 'max', 'days_drank_per_week', fname=False),
            Aggregate('drnk_day_n', 'max', 'drinks_per_day', fname=False),
        ]

        return aggregates
Ejemplo n.º 2
0
    def get_aggregates(self, date, delta):

        aggregates = [
            Count(),
            Aggregate(days('visit_d', 'date_of_birth'), ['min', 'max'],
                      'visit'),
            Aggregate('serv_typ_c', lambda s: set(s), 'service', fname=False),
            Aggregate('preg_nbr_n', 'max', 'previous_pregnancies',
                      fname=False),
            Aggregate('lv_brth_n', 'max', 'previous_births', fname=False),
            Aggregate('othr_trm_n',
                      'max',
                      'previous_terminations',
                      fname=False),
            Aggregate(lambda p: p.smk3_mth_f == 'Y',
                      'any',
                      'smoked_3mo',
                      fname=False),
            Aggregate('cig3_day_n', 'max', 'cigarettes_per_day', fname=False),
            Aggregate(lambda p: p.drk3_mth_f == 'Y',
                      'any',
                      'drank_3mo',
                      fname=False),
            Aggregate('dr_dy_wk_n', 'max', 'days_drank_per_week', fname=False),
            Aggregate('drnk_day_n', 'max', 'drinks_per_day', fname=False),
            Aggregate('clinicid_i', lambda c: set(c), 'clinic', fname=False)
        ]

        return aggregates
Ejemplo n.º 3
0
    def get_aggregates(self, date, delta):
        kid_count = Aggregate('kid_id',
                              'nunique',
                              name='kid_count',
                              fname=False)

        aggregates = [
            Count(),
            Aggregate('bll', ['mean', 'median', 'max', 'min', 'std']),
            Aggregate(lambda t: t.bll.where(t.increase),
                      ['mean', 'median', 'max', 'min', 'std'], 'increase_bll'),
            Count(lambda t: t.bll <= 2, 'bll2', prop=True),
            # prevalences
            Fraction(Count(['first_bll6', 'first_bll10']),
                     kid_count,
                     include_numerator=True,
                     include_denominator=True),
        ]

        # incidences
        if delta != 'all':
            start_date = date - data.parse_delta(delta)
            no_bll6_count = Aggregate(lambda k: k.kid_id.where(
                (k.first_bll6_sample_date >= start_date).fillna(True)),
                                      'nunique',
                                      name='no_bll6_count',
                                      fname=False)
            no_bll10_count = Aggregate(lambda k: k.kid_id.where(
                (k.first_bll10_sample_date >= start_date).fillna(True)),
                                       'nunique',
                                       name='no_bll10_count',
                                       fname=False)

            aggregates.extend([
                no_bll6_count, no_bll10_count,
                Count('first_bll6') / no_bll6_count,
                Count('first_bll10') / no_bll10_count
            ])

        if delta == 'all':
            aggregates.extend([
                Aggregate(days('date', date), ['min', 'max'],
                          'days_since_test'),
                Aggregate([
                    lambda t:
                    (date - t.date.where(t.bll >= 6)) / day, lambda t:
                    (date - t.date.where(t.bll >= 10)) / day
                ], ['min', 'max'], ['days_since_bll6', 'days_since_bll10'])
            ])
        return aggregates
Ejemplo n.º 4
0
    def get_aggregates(self, date, delta):

        aggregates = [
            Count(),
            Aggregate(days('visit_d', 'date_of_birth'), ['min', 'max'], 'visit'),
            Aggregate('serv_typ_c', lambda s: set(s), 'service', fname=False),
            Aggregate('preg_nbr_n', 'max', 'previous_pregnancies', fname=False),
            Aggregate('lv_brth_n', 'max', 'previous_births', fname=False),
            Aggregate('othr_trm_n', 'max', 'previous_terminations', fname=False),
            Aggregate(lambda p: p.smk3_mth_f == 'Y', 'any', 'smoked_3mo', fname=False),
            Aggregate('cig3_day_n', 'max', 'cigarettes_per_day', fname=False),
            Aggregate(lambda p: p.drk3_mth_f == 'Y', 'any', 'drank_3mo', fname=False),
            Aggregate('dr_dy_wk_n', 'max', 'days_drank_per_week', fname=False),
            Aggregate('drnk_day_n', 'max', 'drinks_per_day', fname=False),
            Aggregate('clinicid_i', lambda c: set(c), 'clinic', fname=False)
        ]

        return aggregates
Ejemplo n.º 5
0
    def get_aggregates(self, date, delta):
        kid_count = Aggregate('kid_id', 'nunique', 
                name='kid_count', fname=False)

        aggregates = [
            Count(),
            Aggregate('bll', ['mean', 'median', 'max', 'min', 'std']),
            Count(lambda t: t.bll <= 2, 'bll2', prop=True),
            Fraction(Count(['first_bll6', 'first_bll10']), kid_count, 
                    include_numerator=True, include_denominator=True),
        ]
        if delta == 'all':
            aggregates.extend([
                Aggregate(days('date',date), ['min','max'], 
                        'days_since_test'),
                Aggregate([
                    lambda t: (date - t.date.where(t.bll >= 6))/day,
                    lambda t: (date - t.date.where(t.bll >= 10))/day],
                    ['min','max'], ['days_since_bll6', 'days_since_bll10'])
            ])
        return aggregates
Ejemplo n.º 6
0
    def get_aggregates(self, date, index, delta):
        if index == 'kid':
            return [
                Aggregate(
                    ['test_address_count', 'address_count', 'test_count'],
                    'max',
                    fname=False),
                Aggregate(['max_bll'], 'max', fname=False),
                # Comment out this and all other wic aggregates because they can't be lagged
                # and they're not useful for predicting poisoning
                #Aggregate(lambda k: k.last_wic_date == k.address_wic_max_date,
                #        'any', 'last_wic_address', fname=False),
                #Aggregate(['address_wic_mother', 'address_wic_infant'], 'any', fname=False),
                #Aggregate([days('address_wic_max_date', date),
                #        days('address_wic_min_date', date),
                #        days('last_wic_date', date),
                #        days('first_wic_date', date)],
                #        ['max'], ['address_wic_min_date', 'address_wic_max_date',
                #                  'last_wic_date', 'first_wic_date'], fname=False)
            ]

        sample_2y = lambda k: ((k.last_sample_date - k.date_of_birth) / day >
                               365 * 2) | (k.max_bll >= 6)
        counts = Count([np.float32(1), sample_2y], ['kid', 'kid_sample_2y'])

        aggregates = [
            counts,
            Aggregate(['test_address_count', 'test_count', 'address_count'],
                      ['median', 'mean', 'min', 'max']),
            Count([
                lambda k: k.address_test_min_date.notnull(),
                lambda k: k.first_sample_date.notnull()
            ],
                  prop=True,
                  name=['tested_here', 'tested_ever']),

            #Count(lambda k: k.first_wic_date.notnull(), prop=True, name='wic'),

            #Count([lambda k: k.address_wic_min_date.notnull() & k.address_test_min_date.notnull(),
            #       lambda k: k.address_wic_min_date.notnull() & k.first_sample_date.notnull()],
            #       name=['wic_tested_here', 'wic_tested_ever'],
            #       prop=lambda k: k.first_wic_date.notnull(), prop_name='wic'),
            Aggregate(
                [
                    days('address_min_date', 'address_max_date'),
                    #days('address_wic_min_date', 'address_wic_max_date'),
                    days('address_test_min_date', 'address_test_max_date')
                ],
                ['mean'],
                [
                    'address_total_time',  #'address_wic_time', 
                    'address_test_time'
                ]),

            # the first of these are kid level, not address-kid level
            # that means kids get double counted when aggregated to above the address level
            # if they lived in multiple addresses on that e.g. census tract. oh well.
            Aggregate([
                'max_bll', 'avg_bll', 'cumulative_bll', 'avg_cumulative_bll',
                'mean_bll', 'address_max_bll', 'address_mean_bll'
            ], ['mean', 'median', 'min', 'max']),

            # ebll past, present, future, ever count the number of kids who
            # moved into this address in the period defined by date and delta
            # and who were poisoned before, during, after or ever relative to their time living there
            Fraction(Count([
                lambda k: k.first_bll6_sample_date.notnull(),
                lambda k: k.first_bll10_sample_date.notnull()
            ], ['bll6_ever', 'bll10_ever']),
                     counts,
                     include_numerator=True),
            Fraction(Count([
                lambda k: k.first_bll6_sample_date > k.address_max_date,
                lambda k: k.first_bll10_sample_date > k.address_max_date
            ], ['bll6_future', 'bll10_future']),
                     counts,
                     include_numerator=True),
            Fraction(Count([
                lambda k: k.first_bll6_sample_date < k.address_min_date,
                lambda k: k.first_bll10_sample_date < k.address_min_date
            ], ['bll6_past', 'bll10_past']),
                     counts,
                     include_numerator=True),
            Fraction(Count([
                lambda k: k.first_bll6_sample_date.between(
                    k.address_min_date, k.address_max_date),
                lambda k: k.first_bll10_sample_date.between(
                    k.address_min_date, k.address_max_date)
            ], ['bll6_present', 'bll10_present']),
                     counts,
                     include_numerator=True),
            Aggregate('last_name', 'nunique', fname='count', astype=str)
            # TODO: min_last_sample_age cutoffs
        ]
        if delta == 'all':
            aggregates.extend([
                #Aggregate(days('address_wic_min_date', date), ['min', 'max'], 'days_since_wic'),
                Aggregate(days('date_of_birth', date), ['min', 'max', 'mean'],
                          'date_of_birth'),
            ])

        return aggregates
Ejemplo n.º 7
0
    def get_aggregates(self, date, index, delta):
        if index == 'kid':
            return [
                Aggregate(['address_count', 'test_count'], 'max', fname=False),
                Aggregate(['max_bll'], 'max', fname=False),
                Aggregate(lambda k: k.last_wic_date == k.address_wic_max_date,
                          'any',
                          'last_wic_address',
                          fname=False),
                Aggregate(['address_wic_mother', 'address_wic_infant'],
                          'any',
                          fname=False),
                Aggregate([
                    days('address_wic_max_date', date),
                    days('address_wic_min_date', date),
                    days('last_wic_date', date),
                    days('first_wic_date', date)
                ], ['max'], [
                    'address_wic_min_date', 'address_wic_max_date',
                    'last_wic_date', 'first_wic_date'
                ],
                          fname=False)
            ]

        sample_2y = lambda k: ((k.last_sample_date - k.date_of_birth) / day >
                               365 * 2) | (k.max_bll >= 6)
        counts = Count([np.float32(1), sample_2y], ['kid', 'kid_sample_2y'])

        aggregates = [
            counts,
            Aggregate(['address_count', 'test_count'],
                      ['median', 'mean', 'min', 'max']),
            Count([
                lambda k: k.address_test_min_date.notnull(),
                lambda k: k.first_sample_date.notnull(),
                lambda k: k.first_wic_date.notnull()
            ],
                  prop=True,
                  name=['tested_here', 'tested_ever', 'wic']),
            Count([
                lambda k: k.address_wic_min_date.notnull(
                ) & k.address_test_min_date.notnull(), lambda k: k.
                address_wic_min_date.notnull() & k.first_sample_date.notnull()
            ],
                  name=['wic_tested_here', 'wic_tested_ever'],
                  parent=lambda k: k.first_wic_date.notnull()),
            Aggregate([
                days('address_min_date', 'address_max_date'),
                days('address_wic_min_date', 'address_wic_max_date'),
                days('address_test_min_date', 'address_test_max_date')
            ], ['mean'], [
                'address_total_time', 'address_wic_time', 'address_test_time'
            ]),
            Aggregate(
                ['max_bll', 'mean_bll', 'address_max_bll', 'address_mean_bll'],
                ['mean', 'median', 'min', 'max']),
            Fraction(Count([
                lambda k: k.first_bll6_sample_date.notnull(),
                lambda k: k.first_bll10_sample_date.notnull()
            ], ['bll6_ever', 'bll10_ever']),
                     counts,
                     include_numerator=True),
            Fraction(Count([
                lambda k: k.first_bll6_sample_date > k.address_max_date,
                lambda k: k.first_bll10_sample_date > k.address_max_date
            ], ['bll6_future', 'bll10_future']),
                     counts,
                     include_numerator=True),
            Fraction(Count([
                lambda k: k.first_bll6_sample_date < k.address_min_date,
                lambda k: k.first_bll10_sample_date < k.address_min_date
            ], ['bll6_past', 'bll10_past']),
                     counts,
                     include_numerator=True),
            Fraction(Count([
                lambda k: k.first_bll6_sample_date.between(
                    k.address_min_date, k.address_max_date),
                lambda k: k.first_bll10_sample_date.between(
                    k.address_min_date, k.address_max_date)
            ], ['bll6_present', 'bll10_present']),
                     counts,
                     include_numerator=True),
            Aggregate('last_name', 'nunique', fname='count', astype=str)
            # TODO: min_last_sample_age cutoffs
        ]
        if delta == 'all':
            aggregates.extend([
                Aggregate(days('address_wic_min_date', date), ['min', 'max'],
                          'days_since_wic'),
                Aggregate(days('date_of_birth', date), ['min', 'max', 'mean'],
                          'date_of_birth'),
            ])

        return aggregates
Ejemplo n.º 8
0
    def get_aggregates(self, date, index, delta):
        if index == 'kid':
            return [
                Aggregate(['address_count', 'test_count'],
                        'max', fname=False),
                Aggregate(['max_bll'], 'max', fname=False),
                Aggregate(lambda k: k.last_wic_date == k.address_wic_max_date, 
                        'any', 'last_wic_address', fname=False),
                Aggregate(['address_wic_mother', 'address_wic_infant'], 'any', fname=False),
                Aggregate([days('address_wic_max_date', date),
                        days('address_wic_min_date', date),
                        days('last_wic_date', date),
                        days('first_wic_date', date)],
                        ['max'], ['address_wic_min_date', 'address_wic_max_date', 
                                  'last_wic_date', 'first_wic_date'], fname=False)
            ]

        sample_2y = lambda k: ((k.last_sample_date - k.date_of_birth)/day > 365*2) | (k.max_bll >= 6)
        counts = Count([np.float32(1), sample_2y], ['kid', 'kid_sample_2y'])

        aggregates = [
            counts,
            Aggregate(['address_count', 'test_count'], 
                    ['median', 'mean', 'min', 'max']),

            Count([lambda k: k.address_test_min_date.notnull(), 
                   lambda k: k.first_sample_date.notnull(),
                   lambda k: k.first_wic_date.notnull()], prop=True, 
                  name=['tested_here', 'tested_ever', 'wic']),

            Count([lambda k: k.address_wic_min_date.notnull() & k.address_test_min_date.notnull(),
                   lambda k: k.address_wic_min_date.notnull() & k.first_sample_date.notnull()],
                  name=['wic_tested_here', 'wic_tested_ever'], parent=lambda k: k.first_wic_date.notnull()),

            Aggregate([days('address_min_date', 'address_max_date'), 
                       days('address_wic_min_date', 'address_wic_max_date'), 
                       days('address_test_min_date', 'address_test_max_date')],
                       ['mean'], ['address_total_time', 'address_wic_time', 'address_test_time']),

            Aggregate(['max_bll', 'mean_bll', 'address_max_bll', 'address_mean_bll'], 
                    ['mean', 'median', 'min', 'max']),

            Fraction(Count([lambda k: k.first_bll6_sample_date.notnull(), 
                            lambda k: k.first_bll10_sample_date.notnull()],
                           ['bll6_ever', 'bll10_ever']),
                     counts, include_numerator=True),
            Fraction(Count([lambda k: k.first_bll6_sample_date > k.address_max_date,
                            lambda k: k.first_bll10_sample_date > k.address_max_date],
                           ['bll6_future', 'bll10_future']),
                     counts, include_numerator=True),
            Fraction(Count([lambda k: k.first_bll6_sample_date < k.address_min_date,
                    lambda k: k.first_bll10_sample_date < k.address_min_date],
                    ['bll6_past', 'bll10_past']), 
                    counts, include_numerator=True),
            Fraction(Count([lambda k: k.first_bll6_sample_date.between(
                            k.address_min_date, k.address_max_date),
                    lambda k: k.first_bll10_sample_date.between(
                            k.address_min_date, k.address_max_date)],
                    ['bll6_present', 'bll10_present']), 
                    counts, include_numerator=True),
            Aggregate('last_name', 'nunique', fname='count', astype=str)
            # TODO: min_last_sample_age cutoffs
        ]
        if delta == 'all':
            aggregates.extend([
                Aggregate(days('address_wic_min_date', date), ['min', 'max'], 'days_since_wic'),
                Aggregate(days('date_of_birth', date), ['min', 'max', 'mean'], 'date_of_birth'),
            ])

        return aggregates