Esempio n. 1
0
    def get_aggregates(self, date, data):
        aggregates = [
            Count('permit_type_%s' % p, prop=True) for p in PERMIT_TYPES
        ]
        aggregates.append(Count())

        return aggregates
Esempio n. 2
0
    def aggregates(self):
        return [
       	    Count(),
            Count('Arrest'),
      	    Count(lambda c: c['Primary Type'] == 'THEFT', 
                    'theft', prop=True),
	]
Esempio n. 3
0
    def get_aggregates(self, date, delta):

        aggregates = [
            Count(),
            Aggregate('inspected', 'max', fname=False),
            Aggregate('complied', 'max', fname=False),
            Count('hazard_int', prop=True),
            Count('hazard_ext', prop=True),
            Count('hazard', prop=True),
            Count('hazard_both', prop=True),
            Count('inspected'),
            Count('complied', prop=True),
            Aggregate('inspection_to_compliance', ['mean', 'min', 'max']),
            Aggregate(lambda i: (date - i.init_date) / day,
                      ['mean', 'min', 'max'],
                      name='from_inspection'),
            Aggregate(lambda i: (date - i.comply_date) / day,
                      ['mean', 'min', 'max'],
                      name='from_compliance'),
        ]

        aggregates.extend([
            Count(lambda i, c=c: i['closure'] == c,
                  name='closure_%s' % c,
                  prop=True) for c in CLOSURE_CODES
        ])

        return aggregates
Esempio n. 4
0
    def get_aggregates(self, date, delta):
        kid_count = Aggregate('kid_id',
                              'nunique',
                              name='kid_count',
                              fname=False)

        aggregates = [
            Count(),
            Aggregate('bll', ['mean', 'median', 'max', 'min', 'std']),
            Aggregate(lambda t: t.bll.where(t.increase),
                      ['mean', 'median', 'max', 'min', 'std'], 'increase_bll'),
            Count(lambda t: t.bll <= 2, 'bll2', prop=True),
            # prevalences
            Fraction(Count(['first_bll6', 'first_bll10']),
                     kid_count,
                     include_numerator=True,
                     include_denominator=True),
        ]

        # incidences
        if delta != 'all':
            start_date = date - data.parse_delta(delta)
            no_bll6_count = Aggregate(lambda k: k.kid_id.where(
                (k.first_bll6_sample_date >= start_date).fillna(True)),
                                      'nunique',
                                      name='no_bll6_count',
                                      fname=False)
            no_bll10_count = Aggregate(lambda k: k.kid_id.where(
                (k.first_bll10_sample_date >= start_date).fillna(True)),
                                       'nunique',
                                       name='no_bll10_count',
                                       fname=False)

            aggregates.extend([
                no_bll6_count, no_bll10_count,
                Count('first_bll6') / no_bll6_count,
                Count('first_bll10') / no_bll10_count
            ])

        if delta == 'all':
            aggregates.extend([
                Aggregate(days('date', date), ['min', 'max'],
                          'days_since_test'),
                Aggregate([
                    lambda t:
                    (date - t.date.where(t.bll >= 6)) / day, lambda t:
                    (date - t.date.where(t.bll >= 10)) / day
                ], ['min', 'max'], ['days_since_bll6', 'days_since_bll10'])
            ])
        return aggregates
Esempio n. 5
0
    def get_aggregates(self, date, data):
        aggregates = [
            Count(),
            Count(KEYWORDS, prop=True),
            Count(STATUS[1], prop=True),
            Count([
                lambda v, k=k, s=s: v[k] & v[s]
                for k, s in product(KEYWORDS, STATUS[1])
            ],
                  prop=True,
                  name=['%s_%s' % p for p in product(KEYWORDS, STATUS[1])])
        ]

        return aggregates
Esempio n. 6
0
    def get_aggregates(self, date, delta):
        prenatal = self.inputs[0].get_result()

        aggregates = [
            Count(),
            Aggregate(days('visit_d', 'date_of_birth'), ['min', 'max'],
                      'visit'),
            Aggregate(list(select_regexes(prenatal, ['service_.*'])),
                      'sum',
                      fname=False),
            Aggregate('preg_nbr_n', 'max', 'previous_pregnancies',
                      fname=False),
            Aggregate('lv_brth_n', 'max', 'previous_births', fname=False),
            Aggregate('othr_trm_n',
                      'max',
                      'previous_terminations',
                      fname=False),
            Aggregate(lambda p: p.smk3_mth_f == 'Y',
                      'any',
                      'smoked_3mo',
                      fname=False),
            Aggregate('cig3_day_n', 'max', 'cigarettes_per_day', fname=False),
            Aggregate(lambda p: p.drk3_mth_f == 'Y',
                      'any',
                      'drank_3mo',
                      fname=False),
            Aggregate('dr_dy_wk_n', 'max', 'days_drank_per_week', fname=False),
            Aggregate('drnk_day_n', 'max', 'drinks_per_day', fname=False),
        ]

        return aggregates
Esempio n. 7
0
    def aggregates(self):
        return [
            Count(),
            Aggregate('count', 'mean', 'assessents'),
            Aggregate(lambda a: a.land_value / 100000, 'mean', name='land_value'),
            Aggregate(['min_age', 'max_age'], ['min', 'mean', 'max']),

            # residential total value and average value
            Fraction(
                Aggregate(lambda a: a.total_value.where(a.residential > 0) / 100000,
                          'sum', 'residential_total_value', fname=False),
                Aggregate(lambda a: a.units.where(a.residential > 0),
                          'sum', name='residential_units', fname=False),
                include_numerator=True, include_denominator=True
            ),
            # non-residential total and average value
            Fraction(
                Aggregate(lambda a: a.total_value.where(a.residential == 0) / 100000,
                          'sum', 'non_residential_total_value', fname=False),
                Aggregate(lambda a: a.units.where(a.residential == 0),
                          'sum', name='non_residential_units', fname=False),
                include_numerator=True, include_denominator=True
            ),

            Aggregate('apartments', 'mean'),
            Aggregate('units', 'mean'),
            Aggregate(lambda a: a.rooms / a.units, 'mean', name='rooms_per_unit'),
            Aggregate(lambda a: a.beds / a.units, 'mean', name='beds_per_unit'),
            Aggregate(lambda a: a.baths / a.units, 'mean', name='baths_per_unit'),

            Proportion(lambda a: a.owner_occupied > 0, name='owner_occupied'),
            Proportion([lambda a, c=c: a[c] > 0 for c in CLASSES],
                    name=CLASSES)
        ]
Esempio n. 8
0
 def aggregates(self):
     return [
         Count(),
         Aggregate('area', 'sum'),
         Aggregate(lambda b: b.area * b.stories, 'mean', 'volume'),
         Aggregate('years_built', [
             lambda y: np.nanmedian(np.concatenate(y.values)),
             lambda y: np.nanmean(np.concatenate(y.values)),
             lambda y: np.nanmin(np.concatenate(y.values)),
             lambda y: np.nanmax(np.concatenate(y.values)),
         ],
                   fname=['median', 'mean', 'min', 'max']),
         Aggregate('address_count', 'sum'),
         # average proportion of sound building condition
         Proportion(['%s_prop' % c for c in CONDITIONS],
                    'condition_not_null',
                    name=CONDITIONS),
         Aggregate([lambda p: p['%s_prop' % c] > 0 for c in CONDITIONS],
                   'any',
                   name=CONDITIONS),
         Aggregate('stories', 'mean'),
         Aggregate('units', 'sum'),
         Proportion('pre1978_prop',
                    lambda i: i.pre1978_prop.notnull(),
                    denom_name='pre1978_not_null'),
     ]
Esempio n. 9
0
    def get_aggregates(self, date, delta):

        aggregates = [
            Count(),
            Aggregate(days('visit_d', 'date_of_birth'), ['min', 'max'],
                      'visit'),
            Aggregate('serv_typ_c', lambda s: set(s), 'service', fname=False),
            Aggregate('preg_nbr_n', 'max', 'previous_pregnancies',
                      fname=False),
            Aggregate('lv_brth_n', 'max', 'previous_births', fname=False),
            Aggregate('othr_trm_n',
                      'max',
                      'previous_terminations',
                      fname=False),
            Aggregate(lambda p: p.smk3_mth_f == 'Y',
                      'any',
                      'smoked_3mo',
                      fname=False),
            Aggregate('cig3_day_n', 'max', 'cigarettes_per_day', fname=False),
            Aggregate(lambda p: p.drk3_mth_f == 'Y',
                      'any',
                      'drank_3mo',
                      fname=False),
            Aggregate('dr_dy_wk_n', 'max', 'days_drank_per_week', fname=False),
            Aggregate('drnk_day_n', 'max', 'drinks_per_day', fname=False),
            Aggregate('clinicid_i', lambda c: set(c), 'clinic', fname=False)
        ]

        return aggregates
Esempio n. 10
0
    def get_aggregates(self, date, delta):
        kid_count = Aggregate('kid_id', 'nunique', 
                name='kid_count', fname=False)

        aggregates = [
            Count(),
            Aggregate('bll', ['mean', 'median', 'max', 'min', 'std']),
            Count(lambda t: t.bll <= 2, 'bll2', prop=True),
            Fraction(Count(['first_bll6', 'first_bll10']), kid_count, 
                    include_numerator=True, include_denominator=True),
        ]
        if delta == 'all':
            aggregates.extend([
                Aggregate(days('date',date), ['min','max'], 
                        'days_since_test'),
                Aggregate([
                    lambda t: (date - t.date.where(t.bll >= 6))/day,
                    lambda t: (date - t.date.where(t.bll >= 10))/day],
                    ['min','max'], ['days_since_bll6', 'days_since_bll10'])
            ])
        return aggregates
Esempio n. 11
0
    def get_aggregates(self, date, index, delta):
        if index == 'kid':
            return [
                Aggregate(
                    ['test_address_count', 'address_count', 'test_count'],
                    'max',
                    fname=False),
                Aggregate(['max_bll'], 'max', fname=False),
                # Comment out this and all other wic aggregates because they can't be lagged
                # and they're not useful for predicting poisoning
                #Aggregate(lambda k: k.last_wic_date == k.address_wic_max_date,
                #        'any', 'last_wic_address', fname=False),
                #Aggregate(['address_wic_mother', 'address_wic_infant'], 'any', fname=False),
                #Aggregate([days('address_wic_max_date', date),
                #        days('address_wic_min_date', date),
                #        days('last_wic_date', date),
                #        days('first_wic_date', date)],
                #        ['max'], ['address_wic_min_date', 'address_wic_max_date',
                #                  'last_wic_date', 'first_wic_date'], fname=False)
            ]

        sample_2y = lambda k: ((k.last_sample_date - k.date_of_birth) / day >
                               365 * 2) | (k.max_bll >= 6)
        counts = Count([np.float32(1), sample_2y], ['kid', 'kid_sample_2y'])

        aggregates = [
            counts,
            Aggregate(['test_address_count', 'test_count', 'address_count'],
                      ['median', 'mean', 'min', 'max']),
            Count([
                lambda k: k.address_test_min_date.notnull(),
                lambda k: k.first_sample_date.notnull()
            ],
                  prop=True,
                  name=['tested_here', 'tested_ever']),

            #Count(lambda k: k.first_wic_date.notnull(), prop=True, name='wic'),

            #Count([lambda k: k.address_wic_min_date.notnull() & k.address_test_min_date.notnull(),
            #       lambda k: k.address_wic_min_date.notnull() & k.first_sample_date.notnull()],
            #       name=['wic_tested_here', 'wic_tested_ever'],
            #       prop=lambda k: k.first_wic_date.notnull(), prop_name='wic'),
            Aggregate(
                [
                    days('address_min_date', 'address_max_date'),
                    #days('address_wic_min_date', 'address_wic_max_date'),
                    days('address_test_min_date', 'address_test_max_date')
                ],
                ['mean'],
                [
                    'address_total_time',  #'address_wic_time', 
                    'address_test_time'
                ]),

            # the first of these are kid level, not address-kid level
            # that means kids get double counted when aggregated to above the address level
            # if they lived in multiple addresses on that e.g. census tract. oh well.
            Aggregate([
                'max_bll', 'avg_bll', 'cumulative_bll', 'avg_cumulative_bll',
                'mean_bll', 'address_max_bll', 'address_mean_bll'
            ], ['mean', 'median', 'min', 'max']),

            # ebll past, present, future, ever count the number of kids who
            # moved into this address in the period defined by date and delta
            # and who were poisoned before, during, after or ever relative to their time living there
            Fraction(Count([
                lambda k: k.first_bll6_sample_date.notnull(),
                lambda k: k.first_bll10_sample_date.notnull()
            ], ['bll6_ever', 'bll10_ever']),
                     counts,
                     include_numerator=True),
            Fraction(Count([
                lambda k: k.first_bll6_sample_date > k.address_max_date,
                lambda k: k.first_bll10_sample_date > k.address_max_date
            ], ['bll6_future', 'bll10_future']),
                     counts,
                     include_numerator=True),
            Fraction(Count([
                lambda k: k.first_bll6_sample_date < k.address_min_date,
                lambda k: k.first_bll10_sample_date < k.address_min_date
            ], ['bll6_past', 'bll10_past']),
                     counts,
                     include_numerator=True),
            Fraction(Count([
                lambda k: k.first_bll6_sample_date.between(
                    k.address_min_date, k.address_max_date),
                lambda k: k.first_bll10_sample_date.between(
                    k.address_min_date, k.address_max_date)
            ], ['bll6_present', 'bll10_present']),
                     counts,
                     include_numerator=True),
            Aggregate('last_name', 'nunique', fname='count', astype=str)
            # TODO: min_last_sample_age cutoffs
        ]
        if delta == 'all':
            aggregates.extend([
                #Aggregate(days('address_wic_min_date', date), ['min', 'max'], 'days_since_wic'),
                Aggregate(days('date_of_birth', date), ['min', 'max', 'mean'],
                          'date_of_birth'),
            ])

        return aggregates
Esempio n. 12
0
 def get_aggregates(self, date, delta):
     return [
         Count(),
         Count(['event_code_' + e for e in event_codes], prop=True),
         Count(['event_res_code_' + e for e in event_res_codes], prop=True)
     ]
Esempio n. 13
0
    def get_aggregates(self, date, index, delta):
        if index == 'kid':
            return [
                Aggregate(['address_count', 'test_count'], 'max', fname=False),
                Aggregate(['max_bll'], 'max', fname=False),
                Aggregate(lambda k: k.last_wic_date == k.address_wic_max_date,
                          'any',
                          'last_wic_address',
                          fname=False),
                Aggregate(['address_wic_mother', 'address_wic_infant'],
                          'any',
                          fname=False),
                Aggregate([
                    days('address_wic_max_date', date),
                    days('address_wic_min_date', date),
                    days('last_wic_date', date),
                    days('first_wic_date', date)
                ], ['max'], [
                    'address_wic_min_date', 'address_wic_max_date',
                    'last_wic_date', 'first_wic_date'
                ],
                          fname=False)
            ]

        sample_2y = lambda k: ((k.last_sample_date - k.date_of_birth) / day >
                               365 * 2) | (k.max_bll >= 6)
        counts = Count([np.float32(1), sample_2y], ['kid', 'kid_sample_2y'])

        aggregates = [
            counts,
            Aggregate(['address_count', 'test_count'],
                      ['median', 'mean', 'min', 'max']),
            Count([
                lambda k: k.address_test_min_date.notnull(),
                lambda k: k.first_sample_date.notnull(),
                lambda k: k.first_wic_date.notnull()
            ],
                  prop=True,
                  name=['tested_here', 'tested_ever', 'wic']),
            Count([
                lambda k: k.address_wic_min_date.notnull(
                ) & k.address_test_min_date.notnull(), lambda k: k.
                address_wic_min_date.notnull() & k.first_sample_date.notnull()
            ],
                  name=['wic_tested_here', 'wic_tested_ever'],
                  parent=lambda k: k.first_wic_date.notnull()),
            Aggregate([
                days('address_min_date', 'address_max_date'),
                days('address_wic_min_date', 'address_wic_max_date'),
                days('address_test_min_date', 'address_test_max_date')
            ], ['mean'], [
                'address_total_time', 'address_wic_time', 'address_test_time'
            ]),
            Aggregate(
                ['max_bll', 'mean_bll', 'address_max_bll', 'address_mean_bll'],
                ['mean', 'median', 'min', 'max']),
            Fraction(Count([
                lambda k: k.first_bll6_sample_date.notnull(),
                lambda k: k.first_bll10_sample_date.notnull()
            ], ['bll6_ever', 'bll10_ever']),
                     counts,
                     include_numerator=True),
            Fraction(Count([
                lambda k: k.first_bll6_sample_date > k.address_max_date,
                lambda k: k.first_bll10_sample_date > k.address_max_date
            ], ['bll6_future', 'bll10_future']),
                     counts,
                     include_numerator=True),
            Fraction(Count([
                lambda k: k.first_bll6_sample_date < k.address_min_date,
                lambda k: k.first_bll10_sample_date < k.address_min_date
            ], ['bll6_past', 'bll10_past']),
                     counts,
                     include_numerator=True),
            Fraction(Count([
                lambda k: k.first_bll6_sample_date.between(
                    k.address_min_date, k.address_max_date),
                lambda k: k.first_bll10_sample_date.between(
                    k.address_min_date, k.address_max_date)
            ], ['bll6_present', 'bll10_present']),
                     counts,
                     include_numerator=True),
            Aggregate('last_name', 'nunique', fname='count', astype=str)
            # TODO: min_last_sample_age cutoffs
        ]
        if delta == 'all':
            aggregates.extend([
                Aggregate(days('address_wic_min_date', date), ['min', 'max'],
                          'days_since_wic'),
                Aggregate(days('date_of_birth', date), ['min', 'max', 'mean'],
                          'date_of_birth'),
            ])

        return aggregates
Esempio n. 14
0
 def get_aggregates(self, date, delta):
     return [
         Count(),
         Count('Arrest'),
         Count(lambda c: c['Primary Type'] == 'THEFT', 'theft', prop=True)
     ]
Esempio n. 15
0
aggregates = [
    Aggregate('area', 'mean', fname=False),
    Aggregate('year_built', lambda l: list(l), name='years_built',
              fname=False),
    Aggregate(lambda b: (b.t_add1 - b.f_add1) / 2 + 1,
              'max',
              name='address_count',
              fname=False),
    Aggregate('bldg_condi_not_null',
              'any',
              name='condition_not_null',
              fname=False),
    Aggregate('stories', 'mean', fname=False),
    Aggregate('units', 'mean', fname=False),
    Fraction(Count(lambda b: b.year_built < 1978),
             Count(lambda b: b.year_built.notnull()),
             name='pre1978_prop'),
    Fraction(cond,
             Count(lambda b: b.bldg_condi.notnull()),
             name='{numerator}_prop')
]

engine = util.create_engine()
# read tables from db
building_components = pd.read_sql(
    'select * from buildings.building_components', engine)

buildings = pd.read_sql(
    """
select ogc_fid id,