Esempio n. 1
0
def test_http_error(table_id):

    try:
        cbsodata.get_data(table_id)
    except requests.HTTPError:
        assert True
    else:
        assert False
Esempio n. 2
0
def test_select_list():

    default_sel_filt = cbsodata.get_info('82070ENG')['DefaultSelection']
    filters_and_selections = default_sel_filt.split("&")

    for fs in filters_and_selections:
        if fs.startswith('$select='):
            select = fs[8:]

    cbsodata.get_data('82070ENG', select=select.split(', '))
Esempio n. 3
0
def test_filters():

    default_sel_filt = cbsodata.get_info('82070ENG')['DefaultSelection']
    filters_and_selections = default_sel_filt.split("&")

    for fs in filters_and_selections:
        if fs.startswith('$filter='):
            filt = fs[8:]

    cbsodata.get_data('82070ENG', filters=filt)
Esempio n. 4
0
async def get_data(request: Request):
    write_logging(request, 'CBS-API ')
    data = pd.DataFrame(cbsodata.get_data('84286NED'))
    data = data[cbscolumns]
    data = data[data['Codering_3'].str.contains('|'.join(gemeenteselectie))]
    data = data.fillna(0)
    return data.to_dict(orient='index')
Esempio n. 5
0
def get_population_per_ggd_region():
    # Table 84721NED gives information about regions within the Netherlands
    # See: https://www.cbs.nl/nl-nl/cijfers/detail/84721NED
    cbs_data = cbsodata.get_data('84721NED',
                                 select=['RegioS', 'Code_14', 'Inwonertal_52'])
    cbs_df = pd.DataFrame(cbs_data).groupby("Code_14").sum()
    cbs_df.index = cbs_df.index.str.strip()
    return cbs_df.to_dict()["Inwonertal_52"]
Esempio n. 6
0
def process_data_gezondheidsmonitor(CBS_codering, name):
    data = cbsodata.get_data(CBS_codering)
    buurten2016 = cbsodata.get_data('83487NED')
    translate2016 = {}
    for row in buurten2016:
        if row['SoortRegio_2'].rstrip() == 'Gemeente':
            translate2016[row['WijkenEnBuurten']] = row['Codering_3']

    collecting = {}
    for row in data:
        try:
            identifier = int('1' + translate2016[row['RegioS']][2:])
            collecting[identifier] = row
        except:
            pass
    df = pd.DataFrame.from_dict(collecting, orient='index', dtype=None)
    df.rename(columns=translator, inplace=True)
    return df
Esempio n. 7
0
def load_cbs_table(table_id, typed_data_set=False):
    '''
	Create a Postgres table with the required structure,
	downloads the data from CBS, and loads the data into
	the Postgres table.
	'''
    try:
        table_name = get_sanitized_cbs_table_title(table_id, typed_data_set)
    except ConnectionError as e:
        print(
            f'\tERROR! Could not load CBS table due to a ConnectionError. Are you connected to the internet? Error:\n\t{e}'
        )
        # do not continue processing
        return

    print(f'Loading CBS table {table_name}')

    # Do not load data
    if table_exists(table_name):
        print('Table already exists, skipping downloading and loading.')

    else:
        print('Creating table...')
        create_table_for_cbs_table(table_id, table_name)

        print('Downloading data...')
        if typed_data_set:
            data = cbsodata.get_meta(table_id, 'TypedDataSet')
        else:
            data = cbsodata.get_data(table_id)

        print('Inserting data...')
        sanitize_key_dict = get_sanitize_key_dict(table_id)
        connection = get_connection()
        cursor = connection.cursor()

        for row in data:
            # Before we can insert the data, we have to manipulate
            # the key so it matches the columns in the created
            # Postgres table, and we might need to trim the white space
            # from the values.
            row_dict = {
                sanitize_key_dict[key]: sanitize_data(value)
                for key, value in row.items()
            }
            # TODO: it's probably more efficient to just pass a list,
            # I think the required keys are always there. And then we can
            # more easily batch these INSERTs.
            insert_dict(table_name, row_dict, cursor)

        cursor.close()
        connection.commit()
        connection.close()

    add_indices(table_name)
    print()
Esempio n. 8
0
def process_data_sociale_voorzieningen(CBS_codering, name):
    data = cbsodata.get_data(CBS_codering)
    collecting = {}
    for row in reversed(data):
        if len(row['Perioden']) != 4 or len(row['Codering_3'].rstrip()) == 0:
            continue
        identifier = int('1' + row['Codering_3'][2:])
        collecting[identifier] = row
    df = pd.DataFrame.from_dict(collecting, orient='index', dtype=None)
    df.rename(columns=translator, inplace=True)
    return df
Esempio n. 9
0
def process_data_buurten(CBS_codering, name):
    data = cbsodata.get_data(CBS_codering)
    collecting = {}
    year = int(''.join([x for x in name if x.isdigit()]))
    for row in reversed(data):
        collecting[int('1' + row['Codering_3'][2:])] = row
        row['Perioden'] = year
        row['Gemeentenaam_1'] = row['Gemeentenaam_1'].strip()
        row['SoortRegio_2'] = row['SoortRegio_2'].strip()
    df = pd.DataFrame.from_dict(collecting, orient='index', dtype=None)
    df.rename(columns=translator, inplace=True)
    df = absolute_values_BeW(df)
    return df
Esempio n. 10
0
def test_get_data_derden(table_id):

    # option 1
    print("global")
    cbsodata.options.catalog_url = 'dataderden.cbs.nl'
    data_option1 = cbsodata.get_data(table_id)
    cbsodata.options.catalog_url = 'opendata.cbs.nl'

    # option 2
    print("context")
    with cbsodata.catalog('dataderden.cbs.nl'):
        data_option2 = cbsodata.get_data(table_id)

    # option 3
    print("argument")
    data_option3 = cbsodata.get_data(table_id, catalog_url='dataderden.cbs.nl')

    assert len(data_option1[0].keys()) > 0

    for key in data_option1[0].keys():

        assert data_option1[0][key] == \
            data_option2[0][key] == data_option3[0][key]
Esempio n. 11
0
 def get_config(self, dir_config):
     """Get config"""
     if dir_config:
         if isinstance(dir_config, str):
             dir_config = Path(dir_config)
         dir_config.mkdir(exist_ok=True)
         path_area_codes = dir_config / 'area_codes.csv'
         path_cbs = dir_config / 'cbs.csv'
     try:
         area_codes = pd.read_csv(path_area_codes, dtype='object')
     except FileNotFoundError:
         area_codes = self.get_area_codes()
         if dir_config:
             area_codes.to_csv(path_area_codes, index=False)
     for code, place in zip(area_codes.code, area_codes.place):
         self.replace[code] = place
     try:
         cbs = pd.read_csv(path_cbs)
     except FileNotFoundError:
         cbs = pd.DataFrame(cbsodata.get_data(self.cbs_table))
         for c in cbs.columns:
             if cbs[c].dtype == 'int64':
                 continue
             cbs[c] = cbs[c].str.strip()
         if dir_config:
             cbs.to_csv(path_cbs, index=False)
     wp = {}
     wp['any'] = {
         wp.lower():gm
         for wp, gm in zip(cbs.Woonplaatsen, cbs.Naam_2)
         if list(cbs.Woonplaatsen).count(wp) == 1
     }
     gm = {}
     gm['any'] = {gm.lower():gm for gm in set(cbs.Naam_2)}
     for k, v in self.recode_gem.items():
         gm['any'][k] = v
     for province in set(cbs.Naam_4):
         subset = cbs[cbs.Naam_4 == province]
         key = province.lower()
         gm[key] = {gm.lower():gm for gm in set(subset.Naam_2)}
         for k, v in self.recode_gem.items():
             gm[key][k] = v
         wp[key] = {
             wp.lower():gm
             for wp, gm in zip(subset.Woonplaatsen, subset.Naam_2)
             if list(subset.Woonplaatsen).count(wp) == 1
         }
     remove = list(set(cbs.Naam_4.str.lower())) + self.remove
     remove = [s for s in remove if s not in ['utrecht', 'groningen']]
     return wp, gm, self.replace, remove
Esempio n. 12
0
def test_select_n_cols():

    default_sel_filt = cbsodata.get_info('82070ENG')['DefaultSelection']
    filters_and_selections = default_sel_filt.split("&")

    for fs in filters_and_selections:
        if fs.startswith('$select='):
            select = fs[8:]

    select_list = select.split(', ')
    data = cbsodata.get_data('82070ENG', select=select_list[0:2])

    assert len(data[0].keys()) == 2
    assert len(data[5].keys()) == 2
    assert len(data[10].keys()) == 2
Esempio n. 13
0
def get_and_combine_cbs_tables(dict_tables,
                               double_trouble_colnames=None,
                               url='opendata.cbs.nl'):
    """
    Method to get multiple similar tables in the CBS database.

    Parameters
    ----------
    dict_tables : dict(str: str)
        Dictionary with as key the period and as value the table name
    double_trouble_colnames : dict(str: str)
        double_trouble_colnames: Dictionary with columnnames that will cause trouble if the suffix is deleted
    url : str
        URL of the catalog of the CBS databases, i.e.: 'opendata.cbs.nl'

    Returns
    -------
    pd.DataFrame with cbs data
    """

    print(f"Number of tables to collect: {len(dict_tables)}")

    df = pd.DataFrame()
    for interval, table in dict_tables.items():
        print(f"Pythonic iteration {interval} for table {table}")
        try:
            df_sub = pd.DataFrame(cbsodata.get_data(table, catalog_url=url))
            if double_trouble_colnames:
                df_sub = df_sub.rename(columns=double_trouble_colnames)
            cols_wijk_stripped = [
                i.rstrip('0123456789').replace("_", "").lower()
                for i in list(df_sub.columns)
            ]
            dict_wijk_cols_renamed = {
                key: value
                for key, value in zip(iter(df_sub.columns),
                                      iter(cols_wijk_stripped))
            }
            df_sub = df_sub.rename(columns=dict_wijk_cols_renamed)
            df_sub['interval'] = interval
            # print(list(df_sub.columns))
        except Exception:
            df_sub = pd.DataFrame()
            pass
        df = pd.concat([df, df_sub], sort=True)
        # print(list(df.columns))
    return df
Esempio n. 14
0
def cbsmortalitydisplacement():
    cbstable = '70895ned'
    info = cbsodata.get_info(cbstable)

    modified = info['Modified']

    cachefile = cachedir / f"{modified.replace(':', '_')}-cbsmortdispl.json"

    if cachefile.exists():
        with open(cachefile) as fh:
            return json.load(fh)

    data = cbsodata.get_data(cbstable)

    with open(cachefile, 'w') as fh:
        json.dump(data, fh)

    return data
Esempio n. 15
0
def process_data_wmo(CBS_codering, name):
    data = cbsodata.get_data(CBS_codering)
    collecting = {}
    for row in reversed(data):
        if len(row['Perioden']) != 4 or len(row['Codering_3'].rstrip()) == 0:
            continue
        identifier = int('1' + row['Codering_3'][2:])
        Financieringsvorm = row['Financieringsvorm']
        if identifier in collecting:
            result = collecting[identifier]
            result[Financieringsvorm] = row['WmoClienten_5']
            result[Financieringsvorm +
                   ' per100Inwoners_6'] = row['WmoClientenPer1000Inwoners_6']
        else:
            row[Financieringsvorm] = row['WmoClienten_5']
            row[Financieringsvorm +
                ' per100Inwoners_6'] = row['WmoClientenPer1000Inwoners_6']
            collecting[identifier] = row
    df = pd.DataFrame.from_dict(collecting, orient='index', dtype=None)
    df.rename(columns=translator, inplace=True)
    return df
Esempio n. 16
0
def process_data_jongeren(CBS_codering, name):
    data = cbsodata.get_data(CBS_codering)
    collecting = {}
    for row in data:
        if len(row['Perioden']) != 4:
            continue
        identifier = int('1' + row['Codering_3'][2:])
        if 'VormenVanJeugdzorg' in row.keys():
            TypeJeugdzorg = row['VormenVanJeugdzorg']
        else:
            TypeJeugdzorg = row['TypeJeugdzorg']
        if identifier in collecting:
            result = collecting[identifier]
            result[TypeJeugdzorg] = row['TotaalJongerenMetJeugdzorg_5']
        else:
            row[TypeJeugdzorg] = row['TotaalJongerenMetJeugdzorg_5']
            row['Gemeentenaam_1'] = row['Gemeentenaam_1'].strip()
            row['Wijken'] = row['Wijken'].strip()
            collecting[identifier] = row
    df = pd.DataFrame.from_dict(collecting, orient='index', dtype=None)
    df.rename(columns=translator, inplace=True)
    df.loc[df['Codering'].str.contains('NL01', regex=False),
           ['Codering']] = 'NL00'
    return df
Esempio n. 17
0
# -*- coding: utf-8 -*-
"""
Created on Fri Mar 29 16:45:06 2019

@author: Buf
"""
import cbsodata as cbs
import pandas as pd

try:
    pd.read_csv('data.csv')
except IOError:
    pd.DataFrame(cbs.get_data('83021ENG')).to_csv('data.csv')
df_orig = pd.read_csv('data.csv').drop('Unnamed: 0', 1)

try:
    pd.read_csv('metadata.csv')
except IOError:
    pd.DataFrame(cbs.get_meta('83021ENG', 'DataProperties',
                              catalog_url=None)).to_csv('metadata.csv')
metadata = pd.read_csv('metadata.csv')[['Key', 'Title',
                                        'Description']].set_index('Key')

selection1 = [
    'Drinkers_17',
    'Smokers_1',
    'NormalWeight_50',
    'LastMonth_44',  # Drug use
    'LastYear_45',  # Drug use
    'Ever_46',  # Drug use
    'WeeklySporter_67'
Esempio n. 18
0
https://www.cbs.nl/nl-nl/onze-diensten/open-data
Auteur: Jolien Oomens
Centraal Bureau voor de Statistiek

In dit voorbeeld worden gemeentegrenzen gekoppeld aan geboortecijfers om een 
thematische kaart te maken.
"""

import pandas as pd
import geopandas as gpd
import cbsodata

# Haal alle geboortecijfers op en filter op gemeenten
data = pd.DataFrame(
    cbsodata.get_data(
        '83765NED',
        select=['WijkenEnBuurten', 'Codering_3', 'GeboorteRelatief_25']))
data['Codering_3'] = data['Codering_3'].str.strip()

# De geodata wordt via de API van het Nationaal Georegister van PDOK opgehaald.
# Een overzicht van beschikbare data staat op https://www.pdok.nl/datasets.
geodata_url = "https://geodata.nationaalgeoregister.nl/cbsgebiedsindelingen/wfs?request=GetFeature&service=WFS&version=2.0.0&typeName=cbs_gemeente_2017_gegeneraliseerd&outputFormat=json"
gemeentegrenzen = gpd.read_file(geodata_url)

gemeentegrenzen = pd.merge(gemeentegrenzen,
                           data,
                           left_on="statcode",
                           right_on="Codering_3")

p = gemeentegrenzen.plot(column='GeboorteRelatief_25', figsize=(10, 8))
p.axis('off')
Esempio n. 19
0
def retreive_data_cbs(data):
    df = pd.DataFrame(cbsodata.get_data(data))
    return df
import pandas as pd
import cbsodata
from datetime import date
from pathlib import Path

cachestamp_week = date.today().strftime('%G-%V')
cbsfile = Path(f'cache/{cachestamp_week}-83474NED')

if cbsfile.exists():
    df_83474NED = pd.read_json(cbsfile)
else:
    df_83474NED = pd.DataFrame(cbsodata.get_data('83474NED'))
    df_83474NED.to_json(cbsfile)

popsize = df_83474NED.iloc[-1]['BevolkingAanHetEindVanDePeriode_8']

df_dashboard = pd.read_csv(
    'https://raw.githubusercontent.com/Sikerdebaard/netherlands-vaccinations-scraper/main/people-vaccinated.csv',
    index_col=0)
df_dashboard.index = pd.to_datetime(df_dashboard.index)

manual_points = []
for idx, row in df_dashboard.iterrows():
    vaccinations = row['total_vaccinations']
    date = idx.date()
    data = {
        'date': date,
        'total_vaccinations': vaccinations,
        'people_vaccinated': vaccinations,
        'total_vaccinations_per_hundred': vaccinations / popsize * 100,
        'people_vaccinated_per_hundred': vaccinations / popsize * 100,
Esempio n. 21
0
bw_shp = gpd.read_file('zip:///' + bw_file_name + '!uitvoer_shape/buurt_2013.shp')

# for future reference: read straight from url
# http://andrewgaidus.com/Reading_Zipped_Shapefiles/

# data on regional division 2013
cbs_tables = cbsodata.get_table_list()

def find_identifier(cbs_tables, search_list):
    for tbl in cbs_tables:
        if all(x in tbl['Title'] for x in search_list):
            identifier = tbl['Identifier']
    return identifier

gebieden_2013_id = find_identifier(cbs_tables, ['Gebieden', '2013'])
gebieden_2013 = pd.DataFrame(cbsodata.get_data(gebieden_2013_id))


'''
Data wrangling
'''

# stadsgewesten columns
sg_names = ['Code_1', 'Naam_5', 'Code_6', 'Naam_45', 'Code_46']

# clean up and create legible names
stadsgewesten = gebieden_2013[sg_names]. \
                apply( lambda x: x.str.rstrip() ). \
                rename(columns = {
                    'Naam_5' : 'Corop_naam',
                    'Code_6' : 'Corop_code',
Esempio n. 22
0
def generate():
    data = cbsodata.get_data('7461bev')
    df = pd.DataFrame(data)

    if df[df['Perioden'] == '2021'].shape[0] == 0:
        df = df[df['Perioden'] == '2020']
    else:
        df = df[df['Perioden'] == '2021']

    df = df[df['BurgerlijkeStaat'] == 'Totaal burgerlijke staat']
    df = df[df['Geslacht'] == 'Totaal mannen en vrouwen']

    ages = [f'{x} jaar' for x in range(0, 100)] + ['100 jaar of ouder']
    df = df[df['Leeftijd'].isin(ages)]
    df['Leeftijd'] = [int(x.split(' ')[0]) for x in df['Leeftijd'].values]

    popsize = df['Bevolking_1'].sum().astype(int)
    grownups = df[df['Leeftijd'] >= 18]['Bevolking_1'].sum().astype(int)
    children = df[df['Leeftijd'] < 18]['Bevolking_1'].sum().astype(int)
    vaccgroup = df[df['Leeftijd'] >= 12]['Bevolking_1'].sum().astype(int)

    assert (grownups + children) == popsize

    df_model = pd.read_csv('data/ensemble.csv', index_col=0)
    df_model.index = pd.to_datetime(df_model.index)
    df_model.sort_index(inplace=True)

    latest_idx = df_model['people_vaccinated'].index[-1]
    latest_people_vaccinated = df_model.loc[latest_idx]['people_vaccinated']
    latest_people_fully_vaccinated = df_model.loc[latest_idx][
        'people_fully_vaccinated']

    latest_people_vaccinated, latest_idx

    pct_vaccinated = latest_people_vaccinated / popsize * 100
    pct_grownups = latest_people_vaccinated / grownups * 100
    pct_vaccgroup = latest_people_vaccinated / vaccgroup * 100

    if pct_vaccgroup > 100:
        pct_vaccgroup = 100

    pct_fully_vaccinated = latest_people_fully_vaccinated / popsize * 100
    pct_fully_grownups = latest_people_fully_vaccinated / grownups * 100
    pct_fully_vaccgroup = latest_people_fully_vaccinated / vaccgroup * 100

    if pct_fully_vaccgroup > 100:
        pct_fully_vaccgroup = 100

    twitter_handles = ['@covid_nl', '@kalahiri', '@YorickB']
    two_random_handles = random.sample(twitter_handles, 2)
    headers = [
        f'🧙✨🔮 Het orakel kijkt diep in de kristallen bol en ziet daar een grafiek verschijnen.',
        f'🧙✨🔮 Rook, vuur en lasereffecten! Het orakel verschijnt.',
        f'🧙✨🔮 Het orakel kijkt diep in de kristallen bol. {two_random_handles[0]} denkt er zo over en {two_random_handles[1]} weer net wat anders.',
    ]

    tweet = f"""{random.choice(headers)}

Gevaccineerd met ten minste een dosis:
{progressbar(pct_vaccinated)} van de Nederlanders

{progressbar(pct_vaccgroup)} van de 12+-ers""".strip()

    tweet2 = f"""
Volledig gevaccineerd, alle doses van een vaccin ontvangen:
{progressbar(pct_fully_vaccinated)} van de Nederlanders

{progressbar(pct_fully_vaccgroup)} van de 12+-ers""".strip()

    shutdownmsg = """Deze bot wordt op 19 december uitgeschakeld. Volg @yorickb @kalahiri @covid_nl voor meer updates."""

    return [
        tweet, tweet2,
        _latest_year_tweet(),
        'https://twitter.com/vaccinorakel/status/1367747721671675910',
        shutdownmsg
    ], [[]]
Esempio n. 23
0
"""
Voorbeelden gebruik van CBS Open Data v3 in Python
https://www.cbs.nl/nl-nl/onze-diensten/open-data
Auteur: Jolien Oomens
Centraal Bureau voor de Statistiek

Minimale voorbeelden van het ophalen van een tabel, het koppelen van metadata
en het filteren van data voor het downloaden.
"""

import pandas as pd
import cbsodata

# Downloaden van tabeloverzicht
toc = pd.DataFrame(cbsodata.get_table_list())

# Downloaden van gehele tabel (kan een halve minuut duren)
data = pd.DataFrame(cbsodata.get_data('83765NED'))
print(data.head())

# Downloaden van metadata
metadata = pd.DataFrame(cbsodata.get_meta('83765NED', 'DataProperties'))
print(metadata[['Key', 'Title']])

# Downloaden van selectie van data
data = pd.DataFrame(
    cbsodata.get_data('83765NED',
                      filters="WijkenEnBuurten eq 'GM0363    '",
                      select=['WijkenEnBuurten', 'AantalInwoners_5']))
print(data.head())
Esempio n. 24
0
 def download(data_id: str, output_file: Path) -> None:
     if Path(output_file).exists():
         return
     df = pd.DataFrame(cbsodata.get_data(data_id))
     df.to_csv(output_file, index=False)
Esempio n. 25
0
"""
Voorbeelden gebruik van CBS Open Data v3 in Python
https://www.cbs.nl/nl-nl/onze-diensten/open-data
Auteur: Jolien Oomens
Centraal Bureau voor de Statistiek

In dit voorbeeld worden cijfers over de opbrengst van toeristenbelasting per 
jaar opgehaald en weergegeven in een grafiek.
"""

import pandas as pd
import cbsodata

# Download data en selecteer jaarcijfers
data = pd.DataFrame(
    cbsodata.get_data('84120NED',
                      filters="BelastingenEnWettelijkePremies eq 'A045081'"))
data = data[data['Perioden'].str.match("^\d{4}$")]

# Plot een grafiek
p = data.plot(x='Perioden',
              y='OntvangenBelastingenEnWettPremies_1',
              legend=False)
p.set_title("Opbrengst toeristenbelasting per jaar")
p.set_ylim([0, 250])
p.set_xlabel("")
p.set_ylabel("mln euro")
Esempio n. 26
0
import pandas as pd
import cbsodata
import csv
from sqlalchemy import create_engine
import getpass
import os
import datetime
import sys

#Downloading entire dataset
deathdata = pd.DataFrame(cbsodata.get_data('7052eng'))
facilitiesdata = pd.DataFrame(cbsodata.get_data('7042eng'))

#Downloading metadata
deathmetadata = pd.DataFrame(cbsodata.get_meta('7052eng', 'DataProperties'))
facilitiesmetadata = pd.DataFrame(
    cbsodata.get_meta('7042eng', 'DataProperties'))


#link python to mysql and load data
class datamanager:

    #build connnection to mysql database
    #def __init__ (self, database, username=input('insert username'), password=getpass.getpass('insert password')):
    def __init__(self, database):
        f = os.path.join(os.path.expanduser('~'), 'data_analytics_project',
                         'Scripts', 'authentication.txt')
        F = open(f)
        lines = F.readlines()
        self.username = lines[0].strip()
        self.password = lines[1].strip()
"""
  Name     : c3_14_cbsodata.py
  Book     : Hands-on Data Science with Anaconda)
  Publisher: Packt Publishing Ltd. 
  Author   : Yuxing Yan and James Yan
  Date     : 1/15/2018
  email    : [email protected]
             [email protected]
"""

import pandas as pd
import cbsodata as cb
name = '82070ENG'
data = pd.DataFrame(cb.get_data(name))
print(data.head())
info = cb.get_info(name)
print(info['Title'])
Esempio n. 28
0
def read_population_data_xlsx(source, name):
    wijken = defaultdict(str)
    codering = defaultdict(str)
    year = int(''.join([x for x in name if x.isdigit()]))
    cbs_bron = source_translator[year]
    data_buurtenwijken = cbsodata.get_data(cbs_bron)
    for x in data_buurtenwijken:
        wijk = x['WijkenEnBuurten'].strip()
        gemeente = x['Gemeentenaam_1'].strip()
        if x['Codering_3'][0:2] == 'WK':
            wijken[(gemeente, wijk)] = x['Codering_3']
            codering[x['Codering_3']] = (gemeente, wijk)

    ##HIER LADEN WE DE BEWONERSDATA 2018 IN DE DATABASE, NOG IN SQL SERVER ZETTEN
    ## This file needs the document  "kale bron 2015.xlsx" to be in the same folder to run.

    population_data = pd.read_excel(source)
    population_data['Codering'] = population_data.apply(lambda x: wijken[
        (x['GemeenteGBA'], x['GWBnaam' + str(year)].strip())],
                                                        axis=1)
    population_data['0tm17'] = 0
    population_data['18tm65'] = 0
    population_data['66tm74'] = 0
    population_data['75tm84'] = 0
    population_data['85'] = 0
    population_data['totaal'] = 0
    population_data['20tm65'] = 0
    population_data['15tm75'] = 0

    def optellen(wijk):
        if year == 2015:
            leeftijd = wijk['Leeftijd']
        else:
            leeftijd = wijk['jaren']
        aantal_bewoners = wijk['Aantal']
        wijk['totaal'] += aantal_bewoners
        if leeftijd < 18:
            wijk['0tm17'] += aantal_bewoners
        elif leeftijd < 66:
            wijk['18tm65'] += aantal_bewoners
        elif leeftijd < 75:
            wijk['66tm74'] += aantal_bewoners
        elif leeftijd < 85:
            wijk['75tm84'] += aantal_bewoners
        else:
            wijk['85'] += aantal_bewoners
        if leeftijd < 66 and leeftijd > 19:
            wijk['20tm65'] += aantal_bewoners
        if leeftijd < 76 and leeftijd > 14:
            wijk['15tm75'] += aantal_bewoners
        return wijk

    population_data = population_data.apply(optellen, axis=1)
    population_data = population_data[[
        'totaal', '0tm17', '18tm65', '66tm74', '75tm84', '85', '20tm65',
        '15tm75', 'Codering'
    ]].groupby(['Codering']).sum()
    population_data['Codering'] = population_data.index
    population_data = population_data.reset_index(drop=True)
    population_data = population_data[1:]
    population_data['Gemeentecode'] = population_data['Codering'].apply(
        lambda x: 'GM' + x[2:6])
    NL = population_data.sum()
    NL['Codering'] = 'NL00'
    NL['Gemeentecode'] = 'NL00'
    GM = population_data.groupby('Gemeentecode').sum()
    GM['Codering'] = GM.index
    GM = GM.reset_index(drop=True)
    result = pd.concat([population_data, GM], ignore_index=True)
    result = result.append(NL, ignore_index=True)
    #result.index = result['Codering'].map(lambda x: int('1' + x[2:]))
    #result.rename({'Codering_3':'Codering'},inplace = True)
    #result.drop('codering',inplace = True)
    return result
Esempio n. 29
0
from pathlib import Path
from datetime import date
from pathlib import Path

cachekey = date.today().strftime('%G-%V')

output_path = Path('./html/')

try:
    cbscachefile = Path('cache') / f'{cachekey}-83474NED'

    if cbscachefile.exists():
        cbs_df = pd.read_json(cbscachefile)
        print('Using cached 83474NED')
    else:
        cbs_df = pd.DataFrame(cbsodata.get_data('83474NED'))
        cbs_df.to_json(cbscachefile)

    by_year = {}
    by_month = {}

    for idx, row in cbs_df.iterrows():
        try:
            year, month = row['Perioden'].split(' ')
        except ValueError as e:
            continue

        if month not in by_month:
            by_month[month] = {}
        by_month[month][year] = row['LevendGeborenKinderen_2']
def retreive_data_cbs():
    df = pd.DataFrame(cbsodata.get_data('83625ENG'))
    return df