def test_http_error_table_list(): try: cbsodata.get_table_list(catalog_url='test.cbs.nl') except requests.ConnectionError: assert True else: assert False
def test_get_table_list_derden(table_id): # option 1 print("global") cbsodata.options.catalog_url = 'dataderden.cbs.nl' data_option1 = cbsodata.get_table_list() cbsodata.options.catalog_url = 'opendata.cbs.nl' # option 2 print("context") with cbsodata.catalog('dataderden.cbs.nl'): data_option2 = cbsodata.get_table_list() # option 3 print("argument") data_option3 = cbsodata.get_table_list(catalog_url='dataderden.cbs.nl') assert len(data_option1[0].keys()) > 0 for key in data_option1[0].keys(): assert data_option1[0][key] == \ data_option2[0][key] == data_option3[0][key]
""" Name : c3_15_cbsodata_list_of_data.py Book : Hands-on Data Science with Anaconda) Publisher: Packt Publishing Ltd. Author : Yuxing Yan and James Yan Date : 1/15/2018 email : [email protected] [email protected] """ import cbsodata as cb list=cb.get_table_list() print(list)
bw_zip_url = 'http://download.cbs.nl/regionale-kaarten/shape-2013-versie-3-0.zip' bw_file_name = input_path + 'buurt_wijk_kaart_2013/buurt_wijk_kaart_2013.zip' if not os.path.isfile(bw_file_name): print('Downloading buurt en wijk kaarten') r = requests.get(bw_zip_url) with open(bw_file_name,b 'wb') as f: f.write(r.content) bw_shp = gpd.read_file('zip:///' + bw_file_name + '!uitvoer_shape/buurt_2013.shp') # for future reference: read straight from url # http://andrewgaidus.com/Reading_Zipped_Shapefiles/ # data on regional division 2013 cbs_tables = cbsodata.get_table_list() def find_identifier(cbs_tables, search_list): for tbl in cbs_tables: if all(x in tbl['Title'] for x in search_list): identifier = tbl['Identifier'] return identifier gebieden_2013_id = find_identifier(cbs_tables, ['Gebieden', '2013']) gebieden_2013 = pd.DataFrame(cbsodata.get_data(gebieden_2013_id)) ''' Data wrangling '''
""" Voorbeelden gebruik van CBS Open Data v3 in Python https://www.cbs.nl/nl-nl/onze-diensten/open-data Auteur: Jolien Oomens Centraal Bureau voor de Statistiek Minimale voorbeelden van het ophalen van een tabel, het koppelen van metadata en het filteren van data voor het downloaden. """ import pandas as pd import cbsodata # Downloaden van tabeloverzicht toc = pd.DataFrame(cbsodata.get_table_list()) # Downloaden van gehele tabel (kan een halve minuut duren) data = pd.DataFrame(cbsodata.get_data('83765NED')) print(data.head()) # Downloaden van metadata metadata = pd.DataFrame(cbsodata.get_meta('83765NED', 'DataProperties')) print(metadata[['Key', 'Title']]) # Downloaden van selectie van data data = pd.DataFrame( cbsodata.get_data('83765NED', filters="WijkenEnBuurten eq 'GM0363 '", select=['WijkenEnBuurten', 'AantalInwoners_5'])) print(data.head())
def test_table_list(): assert len(cbsodata.get_table_list()) > 100
def main(): if len(sys.argv) > 1 and sys.argv[1] == "data": parser = argparse.ArgumentParser(prog="cbsodata", description=""" CBS Open Data: Command Line Interface Get data by table identifier. """) parse_argument_table_id(parser) parse_argument_catalog(parser) parse_argument_output_format(parser) parse_argument_max_rows(parser) parse_argument_output(parser) args = parser.parse_args(sys.argv[2:]) result = cbsodata.get_data(args.table_id, catalog_url=args.catalog_url) if args.output_file: save_list_to_json(result, args.output_file) if args.output_format == "text": text_outputter(result, max_rows=args.max_rows) else: json_outputter(result, max_rows=args.max_rows) elif len(sys.argv) > 1 and sys.argv[1] == "info": parser = argparse.ArgumentParser(prog="cbsodata", description=""" CBS Open Data: Command Line Interface Get data infomation by table identifier. """) parse_argument_table_id(parser) parse_argument_catalog(parser) parse_argument_output_format(parser) parse_argument_output(parser) args = parser.parse_args(sys.argv[2:]) result = cbsodata.get_info(args.table_id, catalog_url=args.catalog_url) if args.output_file: with open(args.output_file, 'w') as f: json.dump(result, f, indent=4) if args.output_format == "text": text_outputter([{ "Label": k, "Value": v } for k, v in result.items()]) else: print(json.dumps(result, indent=4)) elif len(sys.argv) > 1 and sys.argv[1] == "list": parser = argparse.ArgumentParser(prog="cbsodata", description=""" CBS Open Data: Command Line Interface Get list of available tables. """) parse_argument_catalog(parser) parse_argument_output_format(parser) parse_argument_max_rows(parser) parse_argument_output(parser) args = parser.parse_args(sys.argv[2:]) result = cbsodata.get_table_list(catalog_url=args.catalog_url) if args.output_file: save_list_to_json(result, args.output_file) if args.output_format == "text": text_outputter(result, max_rows=args.max_rows) else: json_outputter(result, max_rows=args.max_rows) # no valid sub command else: parser = argparse.ArgumentParser(prog="cbsodata", description=""" CBS Open Data: Command Line Interface """) parser.add_argument( "subcommand", nargs="?", type=lambda x: isinstance(x, str) and x in AVAILABLE_CMDS, help="the subcommand (one of '{}')".format( "', '".join(AVAILABLE_CMDS))) parser.add_argument("--version", action='store_true', help="show the package version") args = parser.parse_args() if args.subcommand is None: parser.print_help()