def test_update_schema(): from dataflows import Flow, printer, update_schema, validate f = Flow([['a', '-'], ['a', 0]], update_schema(-1, missingValues=['-']), validate(), printer()) results, dp, stats = f.results() print(dp.descriptor) assert results[0] == [ dict(col0='a', col1=None), dict(col0='a', col1=0), ]
def process_stack_demand(stack): def collect_cats(): F = 'כלל המדגם' def f(rows): cat = None for row in rows: if F in row: v = row[F] if v.startswith('סך הכל '): cat = v[7:] elif v.startswith('--- '): if not v.endswith('ללא פירוט'): subcat = v[4:] row['category'] = cat row['subcategory'] = subcat yield row else: yield row return DF.Flow( DF.add_field('category', 'string', resources=-1), DF.add_field('subcategory', 'string', resources=-1), f, DF.delete_fields([F], resources=-1), ) def fix_nones(row): row['demand_pct'] = row['demand_pct'] or 0 key = 'stack:demand' try: demand_stacks = _cache.get(key) except KeyError: demand_stacks = DF.Flow( DF.load('demand.xlsx', infer_strategy=DF.load.INFER_STRINGS, headers=2), collect_cats(), DF.update_schema(-1, missingValues=['--']), DF.unpivot( unpivot_fields=[dict( name='(.+) \\([A-Z]\\)', keys=dict( neighborhood='\\1' ), )], extra_keys=[dict( name='neighborhood', type='string' )], extra_value=dict( name='demand_pct', type='number' ), resources=-1 ), DF.validate(), DF.duplicate('demand', 'demand_stacks'), DF.join_with_self('demand', ['category', 'subcategory'], dict( category=None, subcategory=None, max_demand=dict(name='demand_pct', aggregate='max') )), DF.join( 'demand', ['category', 'subcategory'], 'demand_stacks', ['category', 'subcategory'], dict( max_demand=None ) ), fix_nones, DF.add_field('display', 'string', lambda r: '{:.0f}%'.format(r['demand_pct'] * 100)), DF.add_field('value', 'number', lambda r: r['demand_pct']), DF.add_field('score', 'number', lambda r: r['demand_pct'] / r['max_demand'] * 6), DF.delete_fields(['demand_pct', 'max_demand']), DF.sort_rows('{score}', reverse=True), DF.add_field('scores', 'object', lambda r: dict( title=r['neighborhood'], score_display=r['display'], score_value=float(r['value']), geometry_score=float(r['score']), )), DF.join_with_self('demand_stacks', ['category', 'subcategory'], dict( category=None, subcategory=None, scores=dict(aggregate='array'), )), DF.add_field('card', 'object', lambda r: dict( title='ביקוש ל{}'.format(r['subcategory']), content='', scores=r['scores'], test='demand__{category}__{subcategory}'.format(**r).replace(' ', '_') )), DF.join_with_self('demand_stacks', ['category'], dict( category=None, cards=dict(name='card', aggregate='array'), )), DF.add_field('name', 'string', lambda r: 'demand.{}'.format(r['category']).replace(' ', '_')), ).results()[0][0] _cache.set(key, demand_stacks) cards = [s for s in demand_stacks if s['name'] == stack['name']][0]['cards'] stack.update(dict( layout='scores', currentField='neighborhood', map=True )) stack.setdefault('cards', []).extend(cards)
update_schema('time-series-19-covid-combined', missingValues=['None', ''], fields=[{ "format": "%Y-%m-%d", "name": "Date", "type": "date" }, { "format": "default", "name": "Country/Region", "type": "string" }, { "format": "default", "name": "Province/State", "type": "string" }, { "decimalChar": ".", "format": "default", "groupChar": "", "name": "Lat", "type": "number" }, { "decimalChar": ".", "format": "default", "groupChar": "", "name": "Long", "type": "number" }, { "format": "default", "groupChar": "", "name": "Confirmed", "title": "Cumulative total confirmed cases to date", "type": "integer" }, { "format": "default", "groupChar": "", "name": "Recovered", "title": "Cumulative total recovered cases to date", "type": "integer" }, { "format": "default", "groupChar": "", "name": "Deaths", "title": "Cumulative total deaths to date", "type": "integer" }]),
update_schema('worldwide-aggregated', fields=[{ "format": "%Y-%m-%d", "name": "Date", "type": "date" }, { "format": "default", "name": "Country/Region", "type": "string" }, { "format": "default", "name": "Province/State", "type": "string" }, { "decimalChar": ".", "format": "default", "groupChar": "", "name": "Lat", "type": "number" }, { "decimalChar": ".", "format": "default", "groupChar": "", "name": "Long", "type": "number" }, { "format": "default", "groupChar": "", "name": "Confirmed", "title": "Cumulative total confirmed cases to date", "type": "integer" }, { "format": "default", "groupChar": "", "name": "Recovered", "title": "Cumulative total recovered cases to date", "type": "integer" }, { "format": "default", "groupChar": "", "name": "Deaths", "title": "Cumulative total deaths to date", "type": "integer" }]),