def race_and_age(data): # Filters rows without age data only_with_age = data['with_years_in_prison'].where( lambda r: r['age'] is not None ) # Group by race race_groups = only_with_age.group_by('race') # Sub-group by age cohorts (20s, 30s, etc.) race_and_age_groups = race_groups.group_by( lambda r: '%i0s' % (r['age'] // 10), key_name='age_group' ) # Aggregate medians for each group medians = race_and_age_groups.aggregate([ ('years_in_prison', agate.Length(), 'count'), ('years_in_prison', agate.Median(), 'median_years_in_prison') ]) # Sort the results sorted_groups = medians.order_by('median_years_in_prison', reverse=True) # Print out the results print(sorted_groups.format(max_rows=10))
def main(): table = agate.Table.from_csv('data.csv') print(list(zip(table.column_names, table.column_types))) with_decimals = table.compute([ ('year', agate.Formula(agate.Text(), lambda r: r['Date'].year)), ('lat', DecimalDegrees('Latitude')), ('lng', DecimalDegrees('Longitude')), ]) with_decimals.to_csv('with_decimals.csv') by_year = with_decimals.group_by('year') by_usable = by_year.group_by( lambda r: r['lat'] is not None and r['lng'] is not None) by_year.aggregate([('count', agate.Length())]).print_csv() by_usable.aggregate([('count', agate.Length())]).print_table() data = {} for year, table in by_year.items(): data[year] = [] for row in table.rows: if not row['lat'] or not row['lng']: continue data[year].append( geojson.Point([float(row['lng']), float(row['lat'])])) with open('src/data/attacks.json', 'w') as f: geojson.dump(data, f, sort_keys=True)
def by_country(data): by_country = data['table'].group_by('Home country*') data['failed_by_country'] = by_country.aggregate([('failed_banks', agate.Length())])