Ejemplo n.º 1
0
def test_update_schema():
    from dataflows import Flow, printer, update_schema, validate

    f = Flow([['a', '-'], ['a', 0]], update_schema(-1, missingValues=['-']),
             validate(), printer())
    results, dp, stats = f.results()
    print(dp.descriptor)
    assert results[0] == [
        dict(col0='a', col1=None),
        dict(col0='a', col1=0),
    ]
def process_stack_demand(stack):

    def collect_cats():
        F = 'כלל המדגם'
        
        def f(rows):
            cat = None
            for row in rows:
                if F in row:
                    v = row[F]
                    if v.startswith('סך הכל '):
                        cat = v[7:]
                    elif v.startswith('--- '):
                        if not v.endswith('ללא פירוט'):
                            subcat = v[4:]
                            row['category'] = cat
                            row['subcategory'] = subcat
                            yield row
                else:
                    yield row
        return DF.Flow(
            DF.add_field('category', 'string', resources=-1),
            DF.add_field('subcategory', 'string', resources=-1),
            f,
            DF.delete_fields([F], resources=-1),
        )

    def fix_nones(row):
        row['demand_pct'] = row['demand_pct'] or 0

    key = 'stack:demand'
    try:
        demand_stacks = _cache.get(key)
    except KeyError:        
        demand_stacks = DF.Flow(
            DF.load('demand.xlsx', infer_strategy=DF.load.INFER_STRINGS, headers=2),
            collect_cats(),
            DF.update_schema(-1, missingValues=['--']),
            DF.unpivot(
                unpivot_fields=[dict(
                    name='(.+) \\([A-Z]\\)',
                    keys=dict(
                        neighborhood='\\1'
                    ),
                )],
                extra_keys=[dict(
                    name='neighborhood', type='string'
                )],
                extra_value=dict(
                    name='demand_pct', type='number'
                ),
                resources=-1
            ),
            DF.validate(),
            DF.duplicate('demand', 'demand_stacks'),
            DF.join_with_self('demand', ['category', 'subcategory'], dict(
                category=None, subcategory=None, max_demand=dict(name='demand_pct', aggregate='max')
            )),
            DF.join(
                'demand', ['category', 'subcategory'],
                'demand_stacks', ['category', 'subcategory'],
                dict(
                    max_demand=None
                )
            ),
            fix_nones,
            DF.add_field('display', 'string', lambda r: '{:.0f}%'.format(r['demand_pct'] * 100)),
            DF.add_field('value', 'number', lambda r: r['demand_pct']),
            DF.add_field('score', 'number', lambda r: r['demand_pct'] / r['max_demand'] * 6),
            DF.delete_fields(['demand_pct', 'max_demand']),
            DF.sort_rows('{score}', reverse=True),
            DF.add_field('scores', 'object', lambda r: dict(
                title=r['neighborhood'],
                score_display=r['display'],
                score_value=float(r['value']),
                geometry_score=float(r['score']),
            )),
            DF.join_with_self('demand_stacks', ['category', 'subcategory'], dict(
                category=None, subcategory=None,
                scores=dict(aggregate='array'),
            )),
            DF.add_field('card', 'object', lambda r: dict(
                title='ביקוש ל{}'.format(r['subcategory']),
                content='',
                scores=r['scores'],
                test='demand__{category}__{subcategory}'.format(**r).replace(' ', '_')
            )),
            DF.join_with_self('demand_stacks', ['category'], dict(
                category=None,
                cards=dict(name='card', aggregate='array'),
            )),
            DF.add_field('name', 'string', lambda r: 'demand.{}'.format(r['category']).replace(' ', '_')),
        ).results()[0][0]
        _cache.set(key, demand_stacks)
                    
    cards = [s for s in demand_stacks if s['name'] == stack['name']][0]['cards']
    stack.update(dict(
        layout='scores',
        currentField='neighborhood',
        map=True
    ))
    stack.setdefault('cards', []).extend(cards)
Ejemplo n.º 3
0
 update_schema('time-series-19-covid-combined',
               missingValues=['None', ''],
               fields=[{
                   "format": "%Y-%m-%d",
                   "name": "Date",
                   "type": "date"
               }, {
                   "format": "default",
                   "name": "Country/Region",
                   "type": "string"
               }, {
                   "format": "default",
                   "name": "Province/State",
                   "type": "string"
               }, {
                   "decimalChar": ".",
                   "format": "default",
                   "groupChar": "",
                   "name": "Lat",
                   "type": "number"
               }, {
                   "decimalChar": ".",
                   "format": "default",
                   "groupChar": "",
                   "name": "Long",
                   "type": "number"
               }, {
                   "format": "default",
                   "groupChar": "",
                   "name": "Confirmed",
                   "title": "Cumulative total confirmed cases to date",
                   "type": "integer"
               }, {
                   "format": "default",
                   "groupChar": "",
                   "name": "Recovered",
                   "title": "Cumulative total recovered cases to date",
                   "type": "integer"
               }, {
                   "format": "default",
                   "groupChar": "",
                   "name": "Deaths",
                   "title": "Cumulative total deaths to date",
                   "type": "integer"
               }]),
Ejemplo n.º 4
0
 update_schema('worldwide-aggregated',
               fields=[{
                   "format": "%Y-%m-%d",
                   "name": "Date",
                   "type": "date"
               }, {
                   "format": "default",
                   "name": "Country/Region",
                   "type": "string"
               }, {
                   "format": "default",
                   "name": "Province/State",
                   "type": "string"
               }, {
                   "decimalChar": ".",
                   "format": "default",
                   "groupChar": "",
                   "name": "Lat",
                   "type": "number"
               }, {
                   "decimalChar": ".",
                   "format": "default",
                   "groupChar": "",
                   "name": "Long",
                   "type": "number"
               }, {
                   "format": "default",
                   "groupChar": "",
                   "name": "Confirmed",
                   "title": "Cumulative total confirmed cases to date",
                   "type": "integer"
               }, {
                   "format": "default",
                   "groupChar": "",
                   "name": "Recovered",
                   "title": "Cumulative total recovered cases to date",
                   "type": "integer"
               }, {
                   "format": "default",
                   "groupChar": "",
                   "name": "Deaths",
                   "title": "Cumulative total deaths to date",
                   "type": "integer"
               }]),