def flow(self): taxonomy = self.context.taxonomy txn_config = taxonomy.config fmt_str = [taxonomy.title + ' עבור:'] fields = txn_config['key-fields'] for f in fields: for ct in taxonomy.column_types: if ct['name'] == f: fmt_str.append('%s: "{%s}",' % (ct['title'], f.replace(':', '-'))) break fmt_str = ' '.join(fmt_str) fields = [ct.replace(':', '-') for ct in fields] all_fields = ['_source'] + fields TARGET = 'configurations' saved_config = self.config._unflatten() saved_config.setdefault('publish', {})['allowed'] = False return Flow( duplicate(RESOURCE_NAME, TARGET), join_with_self( TARGET, all_fields, dict((f, {}) for f in all_fields), ), add_computed_field([ dict(operation='format', target='snippets', with_=fmt_str), dict(operation='constant', target='key_values', with_=None), ], resources=TARGET), add_field('config', 'object', saved_config, resources=TARGET), add_field('fields', type='object', default=self.collate_values(fields), resources=TARGET), join_with_self( TARGET, ['_source'], dict( source=dict(name='_source'), config={}, key_values=dict(aggregate='array'), snippets=dict(aggregate='array'), )), set_type('source', type='string'), set_type('config', type='object'), set_type('key_values', type='array'), set_type('snippets', type='array'), set_primary_key(['source']), dump_to_sql( dict([(TARGET, { 'resource-name': TARGET, 'mode': 'update' })]), engine=self.lazy_engine(), ), )
def prepare_locations(): prepare_addresses() return DF.Flow( DF.load('_cache_addresses/datapackage.json'), DF.add_field( 'address', 'string', lambda r: '{} {}{}'.format( r['street_name'], r['house_number'], r['letter'] or '')), DF.add_field( 'item', 'object', lambda r: dict(value=dict(lat=float(r['lat']), lon=float(r['lon']), arnona_zones=r['arnona_zones'], שם=r['address']), display=r['address'])), DF.sort_rows('{house_number}'), DF.delete_fields([ 'house_number', 'letter', 'lat', 'lon', 'arnona_zones', 'address' ]), DF.join_with_self( 'concat', ['street_name'], dict(display=dict(name='street_name'), items=dict(name='item', aggregate='array'))), DF.add_field('sort_street_address', 'string', lambda r: sort_street_address(r['display'])), DF.sort_rows('{sort_street_address}'), DF.delete_fields(['sort_street_address']), DF.printer(), DF.dump_to_path('_cache_locations'), DF.checkpoint('_cache_locations')).results()[0][0]
def get_neighborhood_features(): return DF.Flow( DF.load('neighborhoods.xlsx', name='stat-areas', deduplicate_headers=True), DF.add_field( 'neighborhoods', 'array', lambda r: [v for k, v in r.items() if v and k.startswith('neighborhood')]), DF.add_field('geometry', 'object', lambda r: geometries[r['stat-area']]), DF.concatenate( dict(stat_area=['stat-area'], neighborhoods=[], geometry=[])), DF.update_resource(-1, name='stat-areas'), unwind_neighborhoods(), DF.join_with_self( 'stat-areas', ['neighborhood'], dict( neighborhood=None, stat_areas=dict(name='stat_area', aggregate='array'), geometries=dict(name='geometry', aggregate='array'), )), DF.add_field('geometry', 'object', lambda r: unite_geometries(r['geometries'])), DF.delete_fields(['geometries']), DF.update_resource(-1, name='neighborhoods'), DF.add_field( 'properties', 'object', lambda r: dict( x=3, title=r['neighborhood'], stat_areas=r['stat_areas'])), DF.delete_fields(['neighborhood', 'stat_areas']), DF.checkpoint('_cache_neighborhoods')).results()[0][0]
def flow(*_): return DF.Flow( get_updated_sources(), DF.concatenate(fields=TENDER_MAPPING, target=dict(name='tenders')), DF.validate(), DF.filter_rows(lambda r: r['publication_id']), DF.add_field('tender_type', 'string', lambda r: TENDER_KINDS[r['tender_type_he']], **{'es:keyword': True}), DF.join_with_self( 'tenders', KEY, dict((k, dict(aggregate='last')) for k in list(TENDER_MAPPING.keys()) + ['tender_type'])), DF.set_type('publication_id', type='string', transform=str), DF.set_type('supplier_id', type='string', transform=str), DF.set_type('tender_id', type='string', transform=lambda v: v or 'none'), DF.set_type('.+_date', type='date', format='%d.%m.%Y', on_error=DF.schema_validator.clear), DF.set_type('subjects', type='string', transform=lambda v: ';'.join(x.strip() for x in v.split(',')) if v else ''), DF.set_type('claim_date', type='datetime', transform=lambda v, field_name, row: datetime.datetime. combine(v, row['claim_time'] or datetime.time(0)) if v else None), DF.set_type('tender_type_he', **{'es:keyword': True}), DF.delete_fields(['claim_time']), DF.add_field( 'page_url', 'string', lambda r: f'https://mr.gov.il/ilgstorefront/he/p/{r["publication_id"]}'), DF.add_field('page_title', 'string', lambda r: r['description']), DF.add_field('reason', 'string', lambda r: r['regulation']), DF.add_field('documents', 'array', []), DF.add_field('contact', 'string'), DF.add_field('contact_email', 'string'), DF.validate(), DF.update_resource(-1, **{'dpp:streaming': True}), DF.printer(), )
def postflow(self): key_field_names = [ ct.replace(':', '-') for ct in self.config.get(CONFIG_PRIMARY_KEY) ] def save_pks(saved_pk): def func(package: PackageWrapper): for res in package.pkg.descriptor['resources']: if res['name'] == RESOURCE_NAME: saved_pk['pk'] = res['schema'].get('primaryKey', []) yield package.pkg yield from package return func def restore_pks(saved_pk): def func(package: PackageWrapper): for res in package.pkg.descriptor['resources']: if res['name'] == RESOURCE_NAME: res['schema']['primaryKey'] = saved_pk['pk'] yield package.pkg yield from package return func saved_pk = dict(pk=[]) steps = [ save_pks(saved_pk), sort_rows(self.ORDER_BY_KEY, resources=RESOURCE_NAME), join_with_self( RESOURCE_NAME, key_field_names, { **dict((f, {}) for f in key_field_names), '*': dict(aggregate='last') }), restore_pks(saved_pk) ] f = Flow(*steps) return f
def flow(*_): prepare() yearly_fields = [ 'year', 'unit', 'subunit', 'subsubunit', 'allocated_budget', 'num_beneficiaries' ] return DF.Flow( *[ DF.load('tmp/' + resource_name + '/datapackage.json') for resource_name, _ in loads ], DF.concatenate( FIELD_MAPPING, dict(name='social_services', path='social_services.csv')), DF.sort_rows('{year}', reverse=True), DF.add_field( 'history', 'object', lambda r: dict( (k, r[k] if not isinstance(r[k], decimal.Decimal) else int(r[k])) for k in yearly_fields)), DF.printer(), DF.join_with_self( 'social_services', ['publisher_name', 'activity_name'], dict( publisher_name=None, activity_name=None, activity_description=dict(aggregate='set'), min_year=dict(name='year', aggregate='min'), max_year=dict(name='year', aggregate='max'), history=dict(aggregate='array'), )), DF.add_field('kind', 'string', 'gov_social_service'), DF.add_field('kind_he', 'string', 'שירות חברתי'), DF.update_resource(-1, **{'dpp:streaming': True}), DF.printer(), )
def test_join(): from dataflows import Flow, join, join_with_self, set_type, sort_rows from decimal import Decimal characters = [ { 'first_name': 'Jaime', 'house': 'Lannister', 'last_name': 'Lannister', 'age': 34 }, { 'first_name': 'Tyrion', 'house': 'Lannister', 'last_name': 'Lannister', 'age': 27 }, { 'first_name': 'Cersei', 'house': 'Lannister', 'last_name': 'Lannister', 'age': 34 }, { 'first_name': 'Jon', 'house': 'Stark', 'last_name': 'Snow', 'age': 17 }, { 'first_name': 'Sansa', 'house': 'Stark', 'last_name': 'Stark', 'age': 14 }, { 'first_name': 'Rickon', 'house': 'Stark', 'last_name': 'Stark', 'age': 5 }, { 'first_name': 'Arya', 'house': 'Stark', 'last_name': 'Stark', 'age': 11 }, { 'first_name': 'Bran', 'house': 'Stark', 'last_name': 'Stark', 'age': 10 }, { 'first_name': 'Daenerys', 'house': 'Targaryen', 'last_name': 'Targaryen', 'age': 16 }, ] houses = [ { 'house': 'House of Lannister' }, { 'house': 'House of Greyjoy' }, { 'house': 'House of Stark' }, { 'house': 'House of Targaryen' }, { 'house': 'House of Martell' }, { 'house': 'House of Tyrell' }, ] res, _, _ = Flow( characters, set_type('age', type='number'), houses, join('res_1', 'House of {house}', 'res_2', '{house}', dict(max_age={ 'name': 'age', 'aggregate': 'max' }, avg_age={ 'name': 'age', 'aggregate': 'avg' }, representative={ 'name': 'first_name', 'aggregate': 'last' }, representative_age={'name': 'age'}, number_of_characters={'aggregate': 'count'}, last_names={ 'name': 'last_name', 'aggregate': 'counters' }), full=False, source_delete=True)).results() assert res[0] == [ { 'avg_age': Decimal('31.66666666666666666666666667'), 'house': 'House of Lannister', 'max_age': Decimal(34), 'number_of_characters': 3, 'representative': 'Cersei', 'representative_age': Decimal(34), 'last_names': [('Lannister', 3)] }, { 'avg_age': Decimal('11.4'), 'house': 'House of Stark', 'max_age': Decimal(17), 'number_of_characters': 5, 'representative': 'Bran', 'representative_age': Decimal(10), 'last_names': [('Stark', 4), ('Snow', 1)] }, { 'avg_age': Decimal(16), 'house': 'House of Targaryen', 'max_age': Decimal(16), 'number_of_characters': 1, 'representative': 'Daenerys', 'representative_age': Decimal(16), 'last_names': [('Targaryen', 1)] }, ] # Find youngest of each house res, _, _ = Flow( characters, set_type('age', type='number'), sort_rows('{age:02}'), join_with_self('res_1', '{house}', { 'the_house': { 'name': 'house' }, '*': { 'aggregate': 'first' }, }), sort_rows('{the_house}')).results() assert res[0] == [{ 'the_house': 'Lannister', 'first_name': 'Tyrion', 'last_name': 'Lannister', 'age': Decimal('27') }, { 'the_house': 'Stark', 'first_name': 'Rickon', 'last_name': 'Stark', 'age': Decimal('5') }, { 'the_house': 'Targaryen', 'first_name': 'Daenerys', 'last_name': 'Targaryen', 'age': Decimal('16') }]
checkpoint('processed_data'), # Sort rows by date and country sort_rows('{Country/Region}{Province/State}{Date}', resources='time-series-19-covid-combined'), # Duplicate the stream to create aggregated data duplicate(source='time-series-19-covid-combined', target_name='worldwide-aggregated', target_path='data/worldwide-aggregated.csv'), join_with_self(resource_name='worldwide-aggregated', join_key=['Date'], fields=dict(Date={'name': 'Date'}, Confirmed={ 'name': 'Confirmed', 'aggregate': 'sum' }, Recovered={ 'name': 'Recovered', 'aggregate': 'sum' }, Deaths={ 'name': 'Deaths', 'aggregate': 'sum' })), update_schema('worldwide-aggregated', missingValues=['None', ''], fields=[{ "format": "%Y-%m-%d", "name": "Date", "type": "date" }, { "format": "default",
if __name__ == '__main__': r, _, _ = DF.Flow( DF.load(all_data(), name='cities', headers=1, override_fields=dict(area_id=dict(type='string')), cast_strategy=DF.load.CAST_WITH_SCHEMA), DF.filter_rows(lambda r: r['is_city']), DF.add_field('score_date', 'object', lambda r: dict( date=r['date'].isoformat(), sr=float(r['symptoms_ratio_weighted'] or 0), nr=int(r['num_reports_weighted'])) ), DF.concatenate(dict( id=[], city_name=[], score_date=[] ), target=dict(name='ranking')), DF.join_with_self('ranking', '{city_name}', dict( id=None, city_name=None, scores=dict(name='score_date', aggregate='array') )), sort_limit_scores(), DF.filter_rows(lambda r: r['scores'][-1]['nr'] >= 200), DF.add_field('sortkey', 'integer', lambda r: int(r['scores'][-1]['sr'] * 1000000) + r['scores'][-1]['nr']), DF.sort_rows('{sortkey}', reverse=True), DF.delete_fields(['sortkey']), DF.add_field('rank', 'integer', 0), DF.add_field('translations', 'object', lambda r: city_translations[r['city_name']]), DF.add_field('image', 'object', lambda r: upload_static_image(r['id'], width=280*2, height=160*2)), ranker(), ).results() rankings = r[0] r, _, _ = DF.Flow( DF.load(all_data(), name='cities', headers=1,
name='cities', headers=1, override_fields=dict(area_id=dict(type='string')), cast_strategy=DF.load.CAST_WITH_SCHEMA), DF.filter_rows(lambda r: r['is_city']), DF.add_field( 'score_date', 'object', lambda r: dict(weekday=r['date'].isoweekday() % 7, date=r['date'].toordinal(), sr=float(r['symptoms_ratio_weighted'] or 0), nr=int(r['num_reports_weighted']))), DF.concatenate(dict(id=[], city_name=[], score_date=[]), target=dict(name='popup_data')), DF.join_with_self( 'popup_data', '{city_name}', dict(id=None, city_name=None, scores=dict(name='score_date', aggregate='array'))), sort_limit_scores(), DF.filter_rows(lambda r: r['scores'] is not None), DF.add_field('nr', 'integer', lambda r: r['scores'][-1]['nr']), DF.add_field('sr', 'number', lambda r: r['scores'][-1]['sr']), split_to_weeks(), DF.add_field('translations', 'object', lambda r: city_translations[r['city_name']]), ).results() popup_data = r[0] popup_data = dict((x.pop('id'), x) for x in popup_data) upload_file( json.dumps(popup_data, cls=json_encoder, indent=2).encode('utf8'),
# Duplicate the stream to create aggregated data duplicate( source="time-series-19-covid-combined", target_name="worldwide-aggregated", target_path="data/worldwide-aggregated.csv", ), join_with_self( resource_name="worldwide-aggregated", join_key=["Date"], fields=dict( Date={"name": "Date"}, Confirmed={ "name": "Confirmed", "aggregate": "sum" }, Recovered={ "name": "Recovered", "aggregate": "sum" }, Deaths={ "name": "Deaths", "aggregate": "sum" }, ), ), printer(), update_schema( "worldwide-aggregated", missingValues=["None", ""], fields=[ {
def process_stack_demand(stack): def collect_cats(): F = 'כלל המדגם' def f(rows): cat = None for row in rows: if F in row: v = row[F] if v.startswith('סך הכל '): cat = v[7:] elif v.startswith('--- '): if not v.endswith('ללא פירוט'): subcat = v[4:] row['category'] = cat row['subcategory'] = subcat yield row else: yield row return DF.Flow( DF.add_field('category', 'string', resources=-1), DF.add_field('subcategory', 'string', resources=-1), f, DF.delete_fields([F], resources=-1), ) def fix_nones(row): row['demand_pct'] = row['demand_pct'] or 0 key = 'stack:demand' try: demand_stacks = _cache.get(key) except KeyError: demand_stacks = DF.Flow( DF.load('demand.xlsx', infer_strategy=DF.load.INFER_STRINGS, headers=2), collect_cats(), DF.update_schema(-1, missingValues=['--']), DF.unpivot( unpivot_fields=[dict( name='(.+) \\([A-Z]\\)', keys=dict( neighborhood='\\1' ), )], extra_keys=[dict( name='neighborhood', type='string' )], extra_value=dict( name='demand_pct', type='number' ), resources=-1 ), DF.validate(), DF.duplicate('demand', 'demand_stacks'), DF.join_with_self('demand', ['category', 'subcategory'], dict( category=None, subcategory=None, max_demand=dict(name='demand_pct', aggregate='max') )), DF.join( 'demand', ['category', 'subcategory'], 'demand_stacks', ['category', 'subcategory'], dict( max_demand=None ) ), fix_nones, DF.add_field('display', 'string', lambda r: '{:.0f}%'.format(r['demand_pct'] * 100)), DF.add_field('value', 'number', lambda r: r['demand_pct']), DF.add_field('score', 'number', lambda r: r['demand_pct'] / r['max_demand'] * 6), DF.delete_fields(['demand_pct', 'max_demand']), DF.sort_rows('{score}', reverse=True), DF.add_field('scores', 'object', lambda r: dict( title=r['neighborhood'], score_display=r['display'], score_value=float(r['value']), geometry_score=float(r['score']), )), DF.join_with_self('demand_stacks', ['category', 'subcategory'], dict( category=None, subcategory=None, scores=dict(aggregate='array'), )), DF.add_field('card', 'object', lambda r: dict( title='ביקוש ל{}'.format(r['subcategory']), content='', scores=r['scores'], test='demand__{category}__{subcategory}'.format(**r).replace(' ', '_') )), DF.join_with_self('demand_stacks', ['category'], dict( category=None, cards=dict(name='card', aggregate='array'), )), DF.add_field('name', 'string', lambda r: 'demand.{}'.format(r['category']).replace(' ', '_')), ).results()[0][0] _cache.set(key, demand_stacks) cards = [s for s in demand_stacks if s['name'] == stack['name']][0]['cards'] stack.update(dict( layout='scores', currentField='neighborhood', map=True )) stack.setdefault('cards', []).extend(cards)
def process_institutions(stack): key = 'stack:institutions' try: institutions_cards = _cache.get(key) except KeyError: CRS = '+ellps=GRS80 +k=1.00007 +lat_0=31.73439361111111 +lon_0=35.20451694444445 +no_defs +proj=tmerc +units=m +x_0=219529.584 +y_0=626907.39' projector = pyproj.Proj(CRS) def proj(): def func(row): row['lon'], row['lat'] = projector(row['X'], row['Y'], inverse=True) return DF.Flow( DF.add_field('lon', 'number'), DF.add_field('lat', 'number'), func, DF.delete_fields(['X', 'Y']) ) def translate_kind(): translations = { 'מרפאה': 'מרפאות', 'איצטדיון': 'איצטדיון', 'ספרייה': 'ספריות', 'בית ספר': 'בתי ספר', 'מועדון קהילתי כולל מרכז צעירים': 'מועדון קהילתי', 'בית כנסת': 'בתי כנסת', 'מועדון נוער': 'מועדון נוער', 'אולם מופעים, היכל תרבות': 'מוסדות תרבות', 'מועדון קשישים, מרכז לאזרחים ותיקים,מרכז יום לקשישים': 'מרכזי פעילות לקשישים', } def func(row): row['kind'] = translations[row['kind']] return func institutions_cards = DF.Flow( *[ DF.load(f) for f in glob.glob('institutions/*xlsx') ], DF.concatenate(dict( kind=['סוג המוסד'], title=['שם המוסד'], address=['כתובת'], X=[], Y=[] )), translate_kind(), proj(), DF.add_field('feature', 'object', lambda r: geojson.Feature( properties=dict(title=r['title'], address=r['address']), geometry=geojson.Point(coordinates=[float(r['lon']), float(r['lat'])]) )), DF.delete_fields(['title', 'lon', 'lat', 'address']), DF.join_with_self('concat', ['kind'], dict( title=dict(name='kind'), features=dict(name='feature', aggregate='array') )), DF.sort_rows('{title}', reverse=True), DF.add_field('pointGeometry', 'object', lambda r: geojson.FeatureCollection(features=r['features'])), DF.add_field('content', 'string', ' '), DF.delete_fields(['features']), # DF.printer(tablefmt='html') ).results()[0][0] _cache.set(key, institutions_cards) stack.update(dict( map=True, )) stack.setdefault('cards', []) current_cards = dict( (c['title'], c) for c in stack['cards'] ) for card in institutions_cards: current_card = current_cards.pop(card['title'], None) if current_card is not None: card['content'] = current_card['content'] else: print('SPURIOUS CARD for INSTITUTIONS', card['title']) stack['cards'] = [ c for c in stack['cards'] if c['title'] in current_cards ] + institutions_cards
def process_demographics(stack): key = 'stack:demographics' try: demographics_cards = _cache.get(key) except KeyError: def add_source(): def f(rows): for row in rows: row['source'] = rows.res.name yield row return DF.Flow( DF.add_field('source', 'string'), f ) def map_to_cards(): MAP = { ("דו''ח אג''ס לפי עולים וותיקים", ("סה''כ עולים",) ): 'immigrants', ("דו''ח אג''ס לפי קבוצות גיל", ('0-5', '6-12') ): 'kids', ("דו''ח אג''ס לפי קבוצות גיל", ('13-17',) ): 'teenagers', ("דו''ח אג''ס לפי קבוצות גיל", ('60-64', '65-69', '70-74', '75-120') ): 'elderly', ("דו''ח אג''ס לפי קבוצות גיל", ('18-21','22-24','25-29','30-34','35-39','40-44','45-49','50-54','55-59') ): 'adults', } def f(rows): for row in rows: for (source, kinds), kind in MAP.items(): if row['source'] == source and row['kind'] in kinds: row['kind'] = kind yield row return f s2n = dict( (int(stat_area), f['properties']['title']) for f in get_neighborhood_features() for stat_area in f['properties']['stat_areas'] ) MAP2 = dict( adults=('אוכלוסיה בוגרת', 'גברים ונשים בין גיל 18 ל-60', 0), kids=('ילדים', 'תינוקות וילדים עד גיל 12', 1), teenagers=('בני נוער', 'נערים ונערות עד גיל 18', 2), elderly=('הגיל השלישי', 'גברים ונשים מעל גיל 60', 3), immigrants=('עולים לישראל', 'תושבים שאינם ילידי ישראל', 4), ) demographics_cards = DF.Flow( *[ DF.load(f, headers=4) for f in glob.glob('demographics/*.csv') ], DF.add_field('stat_id', 'string', lambda r: r["אג''ס"]), DF.add_field('total', 'number', lambda r: r.get("סה''כ")), DF.delete_fields(["אג''ס", "סה''כ "]), DF.unpivot([dict( name="([-'א-ת0-9 ].+)", keys=dict( kind=r'\1' ) )], [dict( name='kind', type='string' )], dict( name='value', type='number' )), DF.validate(), add_source(), map_to_cards(), DF.concatenate(dict( total=[], value=[], kind=[], stat_id=[] )), DF.add_field('neighborhood', 'string', lambda r: s2n.get(int(r['stat_id']))), DF.filter_rows(lambda r: r['neighborhood']), DF.join_with_self('concat', ['neighborhood', 'kind'], dict( neighborhood=None, kind=None, total=dict(aggregate='sum'), value=dict(aggregate='sum'), )), DF.duplicate('concat', 'maxes'), DF.join_with_self('concat', ['neighborhood'], dict(neighborhood=None, total=None)), DF.join('concat', ['neighborhood'], 'maxes', ['neighborhood'], dict( total=None, )), DF.add_field('score_value', 'number', lambda r: r['value']), # /r['total'] DF.sort_rows('{score_value}', reverse=True), DF.duplicate('maxes', 'demographics'), DF.join_with_self('maxes', ['kind'], dict(kind=None, max_value=dict(name='score_value', aggregate='max'))), DF.join('maxes', ['kind'], 'demographics', ['kind'], dict(max_value=None)), DF.add_field('geometry_score', 'number', lambda r: 6*r['score_value']/r['max_value']), DF.add_field('score_display', 'string', lambda r: '{:,} ({:.0f}%)'.format(r['value'], 100*r['score_value']/r['total'])), DF.add_field('scores', 'object', lambda r: dict( title=r['neighborhood'], score_value=float(r['score_value']), score_display=r['score_display'], geometry_score=float(r['geometry_score']), )), DF.join_with_self('demographics', ['kind'], dict( kind=None, scores=dict(aggregate='array'), )), DF.add_field('title', 'string', lambda r: MAP2[r['kind']][0]), DF.add_field('content', 'string', lambda r: MAP2[r['kind']][1]), DF.add_field('order', 'integer', lambda r: MAP2[r['kind']][2]), DF.sort_rows('{order}'), DF.delete_fields(['kind']), ).results()[0][0] _cache.set(key, demographics_cards) # features = [ # dict(type='Feature', geometry=r['geometry'], properties=dict(title=r['neighborhoods'][0])) # for r in DF.Flow( # DF.load('geo/stat-areas/stat-areas/datapackage.json'), # ).results()[0][0] # ] # geometry=dict(type='FeatureCollection', features=features) stack.update(dict( map=True, scheme='green', currentField='neighborhood', layout='scores', # geometry=geometry )) stack.setdefault('cards', []).extend(demographics_cards)
DF.load(latest_file(), name='out', override_fields=dict(area_id=dict(type='string')), cast_strategy=DF.load.CAST_WITH_SCHEMA), DF.add_field('city_area_id', 'string', lambda r: r['area_id'].split('-')[0]), DF.join('cities', ['city_area_id'], 'out', ['city_area_id'], dict(num_city_reports=dict(name='num_reports_weighted'))), DF.add_field('desc', 'string', ''), DF.add_field('kind', 'string', ''), DF.add_field('property', 'string', ''), props(), DF.join_with_self( 'out', ['is_city', 'kind', 'desc', 'property'], dict(is_city=None, kind=None, desc=None, property=None, id=dict(aggregate='array'))), ).results() for item in r[0]: print('bucket for {} {} {}: {}'.format( 'city' if item['is_city'] else 'neighborhood', item['kind'], item['desc'], len(item['id']))) if item['property'] is None: continue if item['kind'] == 'fill': if item['is_city']: city_fill_color_cases.extend( [['in', ['get', 'id'], ['literal', item['id']]],
"title": "Cumulative total deaths to date", "type": "integer" }]), checkpoint('processed_data'), # Duplicate the stream to create aggregated data duplicate(source='time-series-19-covid-combined', target_name='worldwide-aggregated', target_path='worldwide-aggregated.csv'), join_with_self(resource_name='worldwide-aggregated', join_key=['Date'], fields=dict(Date={'name': 'Date'}, Confirmed={ 'name': 'Confirmed', 'aggregate': 'sum' }, Recovered={ 'name': 'Recovered', 'aggregate': 'sum' }, Deaths={ 'name': 'Deaths', 'aggregate': 'sum' })), update_schema('worldwide-aggregated', fields=[{ "format": "%Y-%m-%d", "name": "Date", "type": "date" }, { "format": "default", "groupChar": "",