load(f'{BASE_URL}{CONFIRMED}'), load(f'{BASE_URL}{RECOVERED}'), load(f'{BASE_URL}{DEATH}'), load(f'{BASE_URL}{CONFIRMED_US}'), load(f'{BASE_URL}{DEATH_US}'), checkpoint('load_data'), unpivot(unpivoting_fields, extra_keys, extra_value), find_replace([{ 'name': 'Date', 'patterns': [{ 'find': '/', 'replace': '-' }] }]), to_normal_date, set_type('Date', type='date', format='%d-%m-%y', resources=None), set_type('Case', type='number', resources=None), join(source_name='time_series_covid19_confirmed_global', source_key=['Province/State', 'Country/Region', 'Date'], source_delete=True, target_name='time_series_covid19_deaths_global', target_key=['Province/State', 'Country/Region', 'Date'], fields=dict(Confirmed={ 'name': 'Case', 'aggregate': 'first' })), join(source_name='time_series_covid19_recovered_global', source_key=['Province/State', 'Country/Region', 'Date'], source_delete=True, target_name='time_series_covid19_deaths_global', target_key=['Province/State', 'Country/Region', 'Date'],
"series": [ "Exchange Rate" ] } } ], version="0.2.0", readme=readme() ), extract_exchange_rates('daily'), extract_exchange_rates('monthly'), extract_exchange_rates('annual'), update_resource('res_1', **{'name': 'daily', 'path':'data/daily.csv', 'dpp:streaming': True}), update_resource('res_2', **{'name': 'monthly', 'path':'data/monthly.csv', 'dpp:streaming': True}), update_resource('res_3', **{'name': 'annual', 'path':'data/annual.csv', 'dpp:streaming': True}), set_type('Date', resources='daily', type='date', description="Date in ISO format"), set_type('Country', resources='daily', type='string', description="Name of a country"), set_type('Exchange rate', resources='daily', type='number', description="Foreign Exchange Rate to USD. Only AUD, IEP, NZD, GBP and EUR to USD."), set_type('Date', resources='monthly', type='date', description="Date in ISO format"), set_type('Country', resources='monthly', type='string', description="Name of a country"), set_type('Exchange rate', resources='monthly', type='number', description="Foreign Exchange Rate to USD. Only AUD, IEP, NZD, GBP and EUR to USD."), set_type('Date', resources='annual', type='date', description="Date in ISO format"), set_type('Country', resources='annual', type='string', description="Name of a country"), set_type('Exchange rate', resources='annual', type='number', description="Foreign Exchange Rate to USD. Only AUD, IEP, NZD, GBP and EUR to USD."), validate() ) def flow(parameters, datapackage, resources, stats): return exchange_rate_flow
def test_join(): from dataflows import Flow, join, join_with_self, set_type, sort_rows from decimal import Decimal characters = [ { 'first_name': 'Jaime', 'house': 'Lannister', 'last_name': 'Lannister', 'age': 34 }, { 'first_name': 'Tyrion', 'house': 'Lannister', 'last_name': 'Lannister', 'age': 27 }, { 'first_name': 'Cersei', 'house': 'Lannister', 'last_name': 'Lannister', 'age': 34 }, { 'first_name': 'Jon', 'house': 'Stark', 'last_name': 'Snow', 'age': 17 }, { 'first_name': 'Sansa', 'house': 'Stark', 'last_name': 'Stark', 'age': 14 }, { 'first_name': 'Rickon', 'house': 'Stark', 'last_name': 'Stark', 'age': 5 }, { 'first_name': 'Arya', 'house': 'Stark', 'last_name': 'Stark', 'age': 11 }, { 'first_name': 'Bran', 'house': 'Stark', 'last_name': 'Stark', 'age': 10 }, { 'first_name': 'Daenerys', 'house': 'Targaryen', 'last_name': 'Targaryen', 'age': 16 }, ] houses = [ { 'house': 'House of Lannister' }, { 'house': 'House of Greyjoy' }, { 'house': 'House of Stark' }, { 'house': 'House of Targaryen' }, { 'house': 'House of Martell' }, { 'house': 'House of Tyrell' }, ] res, _, _ = Flow( characters, set_type('age', type='number'), houses, join( 'res_1', 'House of {house}', 'res_2', '{house}', dict(max_age={ 'name': 'age', 'aggregate': 'max' }, avg_age={ 'name': 'age', 'aggregate': 'avg' }, representative={ 'name': 'first_name', 'aggregate': 'last' }, representative_age={'name': 'age'}, number_of_characters={'aggregate': 'count'}, last_names={ 'name': 'last_name', 'aggregate': 'counters' }), False, True)).results() assert res[0] == [ { 'avg_age': Decimal('31.66666666666666666666666667'), 'house': 'House of Lannister', 'max_age': Decimal(34), 'number_of_characters': 3, 'representative': 'Cersei', 'representative_age': Decimal(34), 'last_names': [('Lannister', 3)] }, { 'avg_age': Decimal('11.4'), 'house': 'House of Stark', 'max_age': Decimal(17), 'number_of_characters': 5, 'representative': 'Bran', 'representative_age': Decimal(10), 'last_names': [('Stark', 4), ('Snow', 1)] }, { 'avg_age': Decimal(16), 'house': 'House of Targaryen', 'max_age': Decimal(16), 'number_of_characters': 1, 'representative': 'Daenerys', 'representative_age': Decimal(16), 'last_names': [('Targaryen', 1)] }, ] # Find youngest of each house res, _, _ = Flow( characters, set_type('age', type='number'), sort_rows('{age:02}'), join_with_self('res_1', '{house}', { 'the_house': { 'name': 'house' }, '*': { 'aggregate': 'first' }, }), sort_rows('{the_house}')).results() assert res[0] == [{ 'the_house': 'Lannister', 'first_name': 'Tyrion', 'last_name': 'Lannister', 'age': Decimal('27') }, { 'the_house': 'Stark', 'first_name': 'Rickon', 'last_name': 'Stark', 'age': Decimal('5') }, { 'the_house': 'Targaryen', 'first_name': 'Daenerys', 'last_name': 'Targaryen', 'age': Decimal('16') }]
update_resource('brent-annual', **{ 'path': 'data/brent-annual.csv', 'dpp:streaming': True }), update_resource('wti-daily', **{ 'path': 'data/wti-daily.csv', 'dpp:streaming': True }), update_resource('wti-weekly', **{ 'path': 'data/wti-weekly.csv', 'dpp:streaming': True }), update_resource('wti-monthly', **{ 'path': 'data/wti-monthly.csv', 'dpp:streaming': True }), update_resource('wti-annual', **{ 'path': 'data/wti-annual.csv', 'dpp:streaming': True }), format_date, remove_empty_rows, set_type('Date', resources=None, type='date', format='any'), validate(), dump_to_path('data')) def flow(parameters, datapackage, resources, stats): return oil_prices if __name__ == '__main__': oil_prices.process()
def flow(parameters, *_): def take_first(field): def f(row): if field in row and isinstance(row[field], list): row[field] = row[field][0] return Flow( f, set_type(field, type='string'), ) def datetime_to_date(field): def f(row): if field in row: row[field] = row[field].date() return Flow( f, set_type(field, type='date'), ) def approve(parameters): def func(row): if parameters.get('filter-out') is None: return True bad_phrase = parameters['filter-out'] for f in ('page_title', 'description'): if row.get(f) and bad_phrase in row[f]: return False return True return func return Flow( fetcher(parameters), concatenate(dict( page_title=['Title'], publication_id=['ItemId'], tender_id=['ItemUniqueId'], publisher=['OfficeDesc'], start_date=['PublishDate'], claim_date=['LastDate'], decision=['StatusDesc'], description=['Description'], last_update_date=['UpdateDate'], base_url=['BaseUrl'], url_name=['UrlName'], tender_type_he=['PublicationTypeDesc'], ), resources=-1), add_field('tender_type', 'string', default=parameters['tender_type'], resources=-1), take_first('publisher'), take_first('tender_type_he'), add_field('page_url', 'string', default=lambda row: 'https://www.gov.il/he{base_url}{url_name}'.format(**row)), # delete_fields(['base_url', 'url_name']), filter_rows(approve(parameters)), set_type('publication_id', type='integer'), set_type('start_date', type='datetime', format=DATE_FMT), set_type('last_update_date', type='datetime', format=DATE_FMT), set_type('claim_date', type='datetime', format=DATE_FMT), datetime_to_date('last_update_date'), datetime_to_date('start_date'), set_primary_key(['publication_id', 'tender_type', 'tender_id']), dedup(), update_resource(-1, **parameters.pop('resource')), update_resource(-1, **{'dpp:streaming': True}), validate(), # printer(), # lambda rows: (row for row in rows if row['tender_id'].endswith('73f3')), )
skip_rows=[1, 2, 3, 4, 5, -1], headers=['Date', 'Price', 'Empty column'], format='csv', name='annual' ), extract_december_rows, load( load_source='http://www.bundesbank.de/cae/servlet/StatisticDownload?tsId=BBEX3.M.XAU.USD.EA.AC.C06&its_csvFormat=en&its_fileFormat=csv&mode=its', skip_rows=[1, 2, 3, 4, 5, -1], headers=['Date', 'Price', 'Empty column'], format='csv', name='monthly' ), update_resource('monthly', **{'path':'data/monthly.csv', 'dpp:streaming': True}), update_resource('annual', **{'path':'data/annual.csv', 'dpp:streaming': True}), set_type('Date', resources='annual', type='yearmonth'), set_type('Price', resources='annual', type='number'), set_type('Date', resources='monthly', type='yearmonth'), set_type('Price', resources='monthly', type='number'), validate(), delete_fields(['Empty column'], resources=None) ) def flow(parameters, datapackage, resources, stats): return gold_price_flow if __name__ == '__main__': gold_price_flow.process()
verify_unused_fields(), DF.concatenate(FIELD_MAPPING, target=dict(name='out')), fix_urls(['source_url']), ensure_chart_title(), fix_languages(), DF.add_field('order_index', 'integer'), lambda rows: ({ **row, **{ 'order_index': i } } for i, row in enumerate(rows)), set_defaults, extrapulate_years, fix_values, DF.set_type('value', groupChar=',', bareNumber=True), fix_units, DF.set_type('extrapulation_years', type='array', **{'es:itemType': 'string'}), DF.validate(), DF.add_computed_field([ dict(target=dict(name='life_areas', type='array', **{ 'es:itemType': 'string', 'es:keyword': True }), operation=lambda row: [ x for x in [row.get('life_area{}'.format(i)) for i in range(1, 4)]
def data_pull_csv(): unpivoting_fields = [{ "name": r"([0-9]+\/[0-9]+\/[0-9]+)", "keys": { "Date": r"\1" } }] extra_keys = [{"name": "Date", "type": "string"}] extra_value = {"name": "Case", "type": "number"} Flow( load(f"{BASE_URL}{CONFIRMED}"), load(f"{BASE_URL}{RECOVERED}"), load(f"{BASE_URL}{DEATH}"), unpivot(unpivoting_fields, extra_keys, extra_value), find_replace([{ "name": "Date", "patterns": [{ "find": "/", "replace": "-" }] }]), to_normal_date, set_type("Date", type="date", format="%d-%m-%y", resources=None), set_type("Case", type="number", resources=None), join( source_name="time_series_19-covid-Confirmed", source_key=["Province/State", "Country/Region", "Date"], source_delete=True, target_name="time_series_19-covid-Deaths", target_key=["Province/State", "Country/Region", "Date"], fields=dict(Confirmed={ "name": "Case", "aggregate": "first" }), ), join( source_name="time_series_19-covid-Recovered", source_key=["Province/State", "Country/Region", "Date"], source_delete=True, target_name="time_series_19-covid-Deaths", target_key=["Province/State", "Country/Region", "Date"], fields=dict(Recovered={ "name": "Case", "aggregate": "first" }), ), add_computed_field( target={ "name": "Deaths", "type": "number" }, operation="format", with_="{Case}", ), delete_fields(["Case"]), update_resource( "time_series_19-covid-Deaths", name="time-series-19-covid-combined", path=RAW_OUTPUT_CSV, ), dump_to_path(), ).results()[0]
def flow(self): taxonomy = self.context.taxonomy txn_config = taxonomy.config fmt_str = [taxonomy.title + ' for:'] fields = txn_config['key-fields'] for f in fields: for ct in taxonomy.column_types: if ct['name'] == f: fmt_str.append( '%s: "{%s}",' % (ct['title'], f.replace(':', '-')) ) break fmt_str = ' '.join(fmt_str) fields = [ ct.replace(':', '-') for ct in fields ] all_fields = ['_source'] + fields TARGET = 'configurations' saved_config = self.config._unflatten() saved_config.setdefault('publish', {})['allowed'] = False return Flow( duplicate(RESOURCE_NAME, TARGET), join_with_self( TARGET, all_fields, dict((f, {}) for f in all_fields), ), add_computed_field( [ dict( operation='format', target='snippets', with_=fmt_str ), dict( operation='constant', target='key_values', with_=None ), ], resources=TARGET ), add_field('config', 'object', saved_config, resources=TARGET), add_field('fields', type='object', default=self.collate_values(fields), resources=TARGET), join_with_self( TARGET, ['_source'], dict( source=dict(name='_source'), config={}, key_values=dict(aggregate='array'), snippets=dict(aggregate='array'), ) ), set_type('source', type='string'), set_type('config', type='object'), set_type('key_values', type='array'), set_type('snippets', type='array'), set_primary_key(['source']), dump_to_sql( dict([ (TARGET, { 'resource-name': TARGET, 'mode': 'update' }) ]), engine=self.lazy_engine(), ), )
'name': 'serie-a', 'path': 'italym.php', 'key': 'I1', 'links': [], 'dataset-name': 'italian-serie-a', 'dataset-title': 'Italian Serie A (football)' }, { 'name': 'ligue-1', 'path': 'francem.php', 'key': 'F1', 'links': [], 'dataset-name': 'french-ligue-1', 'dataset-title': 'French Ligue 1 (football)' }] for league in leagues: meta = get_league_meta(league) processors = get_processors(meta) processors.append(set_type('Date', type='date', format='%d/%m/%y')), processors.append(dump_to_path(out_path='datasets/' + league['name'])) processors.append(printer()) processors = [ add_metadata(name=league['dataset-name'], title=league['dataset-title'], licenses=licenses, sources=sources, related=related_datasets, readme=readme % league['dataset-title'].replace(' (football)', '')) ] + processors Flow(*processors).process()
def Olap_Datapackage(): flow = Flow( # Load datapackages: load('elspot_prices_data/datapackage.json'), load('afrr_data/datapackage.json'), load('fcr_dk1_data/datapackage.json'), concatenate(fields={ 'Timestamp': ['HourUTC'], 'Area': ['PriceArea'], 'Product': ['product'], 'Amount': ['amount'], 'Price_DKK': ['PriceDKK'], 'Price_EUR': ['PriceEUR'] }, target={ 'name': 'fact', 'path': 'data/fact.csv' }), add_computed_field( [dict(target='id', operation='constant', with_='dummy')]), add_id, set_type('id', type='integer'), set_primary_key(primary_key=['id']), # Reorder so that 'id' column is the first: select_fields([ 'id', 'Timestamp', 'Area', 'Product', 'Amount', 'Price_DKK', 'Price_EUR' ], resources='fact'), # Add foreign keys: add_foreign_keys, # Fact table is ready. Now duplicate the resource to generate dim tables: # First is 'time' table: duplicate(source='fact', target_name='time', target_path='time.csv'), select_fields(['Timestamp'], resources=['time']), join_self(source_name='time', source_key=['Timestamp'], target_name='time', fields={'Timestamp': {}}), # Parse datetime fields and add a separate field for year, month and day: add_computed_field([ dict(target=dict(name='day', type='string'), operation=lambda row: datetime.strptime( row['Timestamp'], '%Y-%m-%dT%H:%M:%S+00:00').strftime('%d' )), dict(target=dict(name='month', type='string'), operation=lambda row: datetime.strptime( row['Timestamp'], '%Y-%m-%dT%H:%M:%S+00:00').strftime('%m' )), dict(target=dict(name='month_name', type='string'), operation=lambda row: datetime.strptime( row['Timestamp'], '%Y-%m-%dT%H:%M:%S+00:00').strftime('%B' )), dict(target=dict(name='year', type='year'), operation=lambda row: datetime.strptime( row['Timestamp'], '%Y-%m-%dT%H:%M:%S+00:00').strftime('%Y' )), ], resources=['time']), set_primary_key(primary_key=['Timestamp'], resources=['time']), # Now 'area' table: duplicate(source='fact', target_name='area', target_path='area.csv'), select_fields(['Area'], resources=['area']), join_self(source_name='area', source_key=['Area'], target_name='area', fields={'Area': {}}), set_primary_key(primary_key=['Area'], resources=['area']), # Now 'product' table: duplicate(source='fact', target_name='product', target_path='product.csv'), select_fields(['Product'], resources=['product']), join_self(source_name='product', source_key=['Product'], target_name='product', fields={'Product': {}}), set_primary_key(primary_key=['Product'], resources=['product']), dump_to_path('olap_datapackage')) flow.process()
def main_flow(prefix=''): source_url = '{}data/publications_for_es/datapackage.json'.format(prefix) package = Package(source_url) all_fields = set(field.name for resource in package.resources for field in resource.schema.fields) all_fields = dict((field_name, []) for field_name in all_fields) return Flow( load(source_url), lambda row: dict(row, json='{}'), concatenate(all_fields, target=dict(name='publications', path='publications.csv')), delete_fields(['json']), prefer_gd('title'), prefer_gd('notes'), prefer_gd('publisher'), prefer_gd('tags'), prefer_gd('language_code'), prefer_gd('pubyear'), split_keyword_list('item_kind', 'gd_Item Type'), split_keyword_list('life_areas', 'gd_Life Domains'), split_keyword_list('source_kind', 'gd_Resource Type'), split_keyword_list('languages', 'language_code', ' '), split_keyword_list('tags', 'tags'), load('data/zotero/datapackage.json'), concatenate(dict( title=[], pubyear=[], publisher=[], authors=[], life_areas=[], notes=[], languages=[], tags=[], url=[], migdar_id=[], item_kind=[], source_kind=[], isbn=[], physical_description=[], publication_distribution_details=[], doc_id=[], ), target=dict(name='publications', path='publications.csv')), set_type('title', **{'es:title': True}), set_type('notes', **{'es:hebrew': True}), set_type('publisher', **{'es:keyword': True}), add_field('year', 'integer', default=extract_year), split_and_translate('tags', 'tags', keyword=True), split_and_translate('life_areas', 'life_areas', keyword=True), split_and_translate('languages', 'languages', keyword=True), split_and_translate('source_kind', 'source_kind', keyword=True), split_and_translate('item_kind', 'item_kind', keyword=True), printer(), add_computed_field([ { 'operation': 'format', 'target': 'doc_id', 'with': KEY_PATTERN }, { 'operation': 'format', 'target': 'page_title', 'with': PAGE_TITLE_PATTERN }, ]), add_computed_field([]), )
'Year', 'patterns': [{ 'find': '(\s?\(\d+\))|(\.0)', 'replace': '' }] }, { 'name': 'Fourth', 'patterns': [{ 'find': '\+|', 'replace': '' }] }], resources=0), update_resource( 0, **{ 'name': 'household-income-us-historical', 'path': 'data/household-income-us-historical.csv', 'dpp:streaming': True }), set_type('Year', type='year'), set_type('^(?!Y).+', type='number'), validate()) def flow(parameters, datapackage, resources, stats): return household_us if __name__ == '__main__': household_us.process()
load_source="http://www.eia.gov/dnav/pet/hist_xls/RWTCw.xls", format="xls", sheet=2, skip_rows=[1, 2, 3], headers=["Date", "Price"], ), load( load_source="http://www.eia.gov/dnav/pet/hist_xls/RWTCm.xls", format="xls", sheet=2, skip_rows=[1, 2, 3], headers=["Date", "Price"], ), load( load_source="http://www.eia.gov/dnav/pet/hist_xls/RWTCa.xls", format="xls", sheet=2, skip_rows=[1, 2, 3], headers=["Date", "Price"], ), rename_resources, set_type("Date", resources=None, type="date", format="any"), validate(), printer(), filter_out_empty_rows, dump_to_path(), ) if __name__ == "__main__": OIL_PRICES.process()
), load( load_source='http://www.bankofengland.co.uk/boeapps/iadb/fromshowcolumns.asp?csv.x=yes&SeriesCodes=IUQAMNPY&UsingCodes=Y&CSVF=TN&Datefrom=01/Jan/1963', skip_rows=[1], headers=['Date', 'Rate'], format='csv', name='quarterly' ), load( load_source='http://www.bankofengland.co.uk/boeapps/iadb/fromshowcolumns.asp?csv.x=yes&SeriesCodes=IUAAMNPY&UsingCodes=Y&CSVF=TN&Datefrom=01/Jan/1963', skip_rows=[1], headers=['Year', 'Rate'], format='csv', name='annual' ), set_type('Date', resources='quarterly', type='date', format='any'), set_type('Rate', resources='quarterly', type='number', description='Quarterly average yield from British Government Securities, 10 year Nominal Par Yield'), set_type('Year', resources='annual', type='date', format='any'), set_type('Rate', resources='annual', type='number', description='Annual average yield from British Government Securities, 10 year Nominal Par Yield'), update_resource('quarterly', **{'path':'data/quarterly.csv', 'dpp:streaming': True}), update_resource('annual', **{'path':'data/annual.csv', 'dpp:streaming': True}), validate(), dump_to_path() ) def flow(parameters, datapackage, resources, stats): return bond_uk if __name__ == '__main__': bond_uk.process()
def test_example_4(): from dataflows import Flow, set_type f = Flow(country_population(), set_type('population', type='number', groupChar=',')) data, dp, _ = f.results()
} ], version="0.2.0", views=[ { "name": "graph", "title": "VIX - CBOE Volatility Index", "specType": "simple", "spec": {"type": "line","group": "Date","series": ["VIX Close"]} } ], readme=readme() ), load( load_source='http://www.cboe.com/publish/ScheduledTask/MktData/datahouse/vixcurrent.csv', headers=2, name='vix-daily' ), set_type('Date', type='date', format='any'), update_resource('vix-daily', **{'title': 'VIX Daily', 'path':'data/vix-daily.csv', 'dpp:streaming': True}), validate() ) def flow(parameters, datapackage, resources, stats): return finance_vix if __name__ == '__main__': finance_vix.process()
def base_flow(): sources, *_ = Flow( list_gdrive(), filter_rows(lambda row: ( row['kind'] == 'drive#file' and row['mimeType'] == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' )), add_field('filename', 'string', default=lambda row: 'pubfiles/{modifiedTime}-{id}.xlsx'.format(**row)), download_files(), add_field('sheet', 'string'), add_field('headers', 'integer', 1), get_sheets(), ).results() return Flow( *[ load(source['filename'], sheet=source['sheet'], headers=source['headers'], infer_strategy=load.INFER_STRINGS, cast_strategy=load.CAST_TO_STRINGS, name=source['filename']) for source in sources[0] ], filter_rows(lambda row: row.get('migdar_id') not in ('', 'None', None)), load('data/zotero/zotero.csv'), concatenate( fields={ 'migdar_id': [], 'title': ['Title', ], 'bib_title': [], 'bib_related_parts': [], 'notes': [], 'tags': ['Tags'], 'publisher': [], 'languages': ['language_code'], 'item_kind': ['Item Type', 'Item type', 'item_type'], 'pubyear': ['pubyear/pubdate'], 'life_areas': ['Life Domains', 'Domain'], 'source_kind': ['Resource Type', 'Resource type'], 'authors': ['author'], 'url': ['URL'], }, target=dict( name='publications', path='data/publications.csv' ) ), fix_nones(), fix_urls(['url']), set_type('title', **{'es:title': True}), set_type('authors', **{'es:boost': True}), set_type('notes', **{'es:hebrew': True}), set_type('publisher', **{'es:boost': True}), add_field('year', 'integer', default=extract_year), split_and_translate('tags', 'tags', keyword=True, delimiter=','), split_and_translate('life_areas', 'life_areas', keyword=True, delimiter=','), split_and_translate('languages', 'languages', keyword=True, delimiter=' '), split_and_translate('source_kind', 'source_kind', keyword=True), split_and_translate('item_kind', 'item_kind', keyword=True), fix_links('notes'), verify_migdar_id(), add_computed_field([ {'operation': 'format', 'target': 'doc_id', 'with': KEY_PATTERN}, {'operation': 'format', 'target': 'page_title', 'with': PAGE_TITLE_PATTERN}, ]), add_field('title_kw', 'string', default=lambda row: row.get('title'), **{'es:keyword': True}), )
{ "name": "graph", "title": "10 year US Government Bond Yields (Monthly granuarlity)", "specType": "simple", "spec": {"type": "line","group": "Date","series": ["Rate"]} } ], readme=readme() ), load( load_source='http://www.federalreserve.gov/datadownload/Output.aspx?rel=H15&series=0809abf197c17f1ff0b2180fe7015cc3&lastObs=&from=&to=&filetype=csv&label=include&layout=seriescolumn', skip_rows=[i+1 for i in range(6)], headers=['Date', 'Rate'], format='csv', name='monthly' ), set_type('Date', type='date', format='any', descriptor='Date in ISO 8601'), set_type('Rate', type='number', description='Percent per year'), update_resource('monthly', **{'path':'data/monthly.csv', 'dpp:streaming': True}), validate(), dump_to_path() ) def flow(parameters, datapackage, resources, stats): return bond_us if __name__ == '__main__': bond_us.process()
{% if 'filter' in processing %} filter_rows(), {% endif %} {% if 'find_replace' in processing %} find_replace([ dict(name='field_name', patterns=[ dict(find='re-pattern-to-find', replace='re-pattern-to-replace-with'), ]) ]), {% endif %} {% if 'delete_fields' in processing %} delete_fields(['field_name']), # Pass a list of field names to delete from the data {% endif %} {% if 'set_type' in processing %} set_type('field_name', type='number', constraints=dict(minimum=3)), # There are quite a few options you can use here # Take a look at https://frictionlessdata.io/specs/table-schema/ # Or you can simply use validate() here instead {% endif %} {% if 'unpivot' in processing %} unpivot(unpivot_fields, extra_keys, extra_value), # See documentation on the meaning of each of these parameters {% endif %} {% if 'custom' in processing %} my_custom_processing, {% endif %} # Save the results add_metadata(name='{{slug}}', title='''{{title}}'''), {% if output in ('print', 'print_n_pkg') %} printer(), {% endif %} {% if output == 'list' %}