df['year'] = year dfs.append(df) df = pd.concat(dfs) return df # simple sum-aggregation of columns starting with prefix over index def aggregate(df, prefix, index): return df.groupby(index).agg( {c: 'sum' for c in df.columns if c.startswith(prefix)}) if __name__ == "__main__": engine = util.create_engine() index = ['geoid', 'year'] race_table = 'C02003' race_columns = { 'race_count_total': 1, 'race_count_white': 3, 'race_count_black': 4, 'race_count_asian': 6 } race_agg = read_acs(race_table, race_columns, engine) race_agg.set_index(index, inplace=True) hispanic_table = 'B03003' hispanic_columns = {'race_count_hispanic': 3} hispanic_agg = read_acs(hispanic_table, hispanic_columns, engine)
def run(self): return util.create_engine()
#!/usr/bin/python from drain import util import pandas as pd from drain import data engine = util.create_engine() # read tables from db building_permits = pd.read_sql("select street_number || ' ' || street_direction || ' ' || street_name || ' ' || suffix as address, issue_date, lower(replace(substring(permit_type from 10), '/', ' ')) as permit_type from input.building_permits where issue_date is not null", engine) data.binarize(building_permits, {'permit_type' : building_permits.permit_type.unique()}, all_classes=True) db = util.PgSQLDatabase(engine) db.to_sql(frame=building_permits, name='building_permits',if_exists='replace', index=False, schema='aux')