def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        default=False,
                        help="increase output verbosity")
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help="Force overwrite of existing data")

    args = parser.parse_args()
    logging.basicConfig(
        level=logging.DEBUG if args.verbose else logging.WARNING,
        stream=sys.stdout)

    config = Config()
    directory = config.get('data', 'prestations_sociales_directory')
    assert directory != 'None', \
        "Set prestations_sociales_directory in the data section of you config[_local].ini file to a valid directory"

    hdf_file_path = os.path.join(directory, 'prestations_sociales.h5')
    if os.path.exists(hdf_file_path):
        if not args.force:
            log.error(
                "The file {} already exists. Use the --force to overwrite.".
                format(hdf_file_path))
            return

    create_prestations_sociales_data_frames()
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('-v', '--verbose', action = 'store_true', default = False, help = "increase output verbosity")
    parser.add_argument('-f', '--force', action = 'store_true', default = False,
        help = "Force overwrite of existing data")

    args = parser.parse_args()
    logging.basicConfig(level = logging.DEBUG if args.verbose else logging.WARNING, stream = sys.stdout)

    config = Config()
    prelevements_sociaux_source = config.get('data', 'prelevements_sociaux_source')
    prelevements_sociaux_directory = config.get('data', 'prelevements_sociaux_directory')
    assert prelevements_sociaux_source != 'None', \
        "Set prelevements_sociaux_source in the data section of you config[_local].ini file to a valid directory"
    assert prelevements_sociaux_directory != 'None', \
        "Set prelevements_sociaux_directory in the data section of you config[_local].ini file to a valid directory"

    clean_directory = os.path.join(prelevements_sociaux_directory, 'clean')
    if not os.path.exists(clean_directory):
        os.makedirs(clean_directory)

    file_path1 = os.path.join(clean_directory, 'recette_csg_crds.csv')
    file_path2 = os.path.join(clean_directory, 'recette_csg_by_type.csv')

    if os.path.exists(file_path1) or os.path.exists(file_path2):
        if not args.force:
            log.error("The files {} and/or {} already exist. Use the --force to overwrite.".format(file_path1, file_path2))
            return

    main_parse()
コード例 #3
0
def load_actual_data(year=None):
    assert year is not None
    parser = Config()
    # Cotisations CSG -CRDS
    try:
        directory = os.path.join(
            parser.get('data', 'prelevements_sociaux_directory'),
            'clean',
        )
        csg_crds_amounts = pd.read_csv(
            os.path.join(directory, 'recette_csg_crds.csv'),
            index_col=0).rename(dict(
                recette_csg='csg',
                recette_crds='crds',
            )) / 1e6
        csg_by_type_amounts = pd.read_csv(
            os.path.join(directory, 'recette_csg_by_type.csv'),
            index_col=0,
        ).drop(['source']).astype(float) / 1e6
        assiette_csg_by_type_amounts = pd.read_csv(
            os.path.join(directory, 'assiette_csg_by_type.csv'),
            index_col=0,
        ) / 1e6
    except:
        assiette_csg_by_type_amounts = None
        csg_by_type_amounts = None
        csg_crds_amounts = None
        pass
    # Prestations sociales
    directory = os.path.join(
        parser.get('data', 'prestations_sociales_directory'),
        'clean',
    )
    amounts_csv = os.path.join(directory, 'historique_depenses.csv')
    beneficiaries_csv = os.path.join(directory, 'historique_beneficiaires.csv')
    prestations_sociales_amounts = pd.read_csv(amounts_csv, index_col=0)
    prestations_sociales_beneficiaries = pd.read_csv(beneficiaries_csv,
                                                     index_col=0)
    # Minimum vieillesses
    minimum_vieillesse_beneficiaries_csv = os.path.join(
        directory, 'historique_beneficiaires_minimum_vieillesse.csv')
    if os.path.exists(minimum_vieillesse_beneficiaries_csv):
        minimum_vieillesse_beneficiaries = pd.read_csv(
            minimum_vieillesse_beneficiaries_csv, index_col=0)

    amounts = pd.concat([
        assiette_csg_by_type_amounts,
        csg_by_type_amounts,
        csg_crds_amounts,
        prestations_sociales_amounts,
    ])
    beneficiaries = pd.concat(
        [minimum_vieillesse_beneficiaries, prestations_sociales_beneficiaries])

    return pd.DataFrame(
        data={
            "actual_amount": amounts[str(year)],
            "actual_beneficiaries": beneficiaries[str(year)],
        })
コード例 #4
0
def main():
    parser = argparse.ArgumentParser()
    # parser.add_argument('-e', '--end', default = 2013, help = 'ending year to be downloaded')
    # parser.add_argument('-s', '--start', default = 2009, help = 'starting year to be downloaded')
    parser.add_argument('-v', '--verbose', action = 'store_true', default = False, help = "increase output verbosity")
    parser.add_argument('-f', '--force', action = 'store_true', default = False,
        help = "Force overwrite of existing data")

    args = parser.parse_args()
    logging.basicConfig(level = logging.DEBUG if args.verbose else logging.WARNING, stream = sys.stdout)

    config = Config()
    directory = config.get('data', 'prestations_sociales_directory')
    assert directory != 'None', \
        "Set prestations_sociales_directory in the data section of you config[_local].ini file to a valid directory"

    # years = range(args.start, args.end + 1)
    hdf_file_path = os.path.join(directory, 'prestations_sociales.h5')
    if os.path.exists(hdf_file_path):
        if not args.force:
            log.error("The file {} already exists. Use the --force to overwrite.".format(hdf_file_path))
            return

    create_prestations_sociales_data_frames()
def main():
    parser = Config()
    xls_directory = parser.get("data", "denombrements_fiscaux_xls")
    assert (
        xls_directory != "None"
    ), "Set denombrements_fiscaux_xls in the data section of your config_local.ini file to a valid directory"

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-t",
        "--target",
        default=xls_directory,
        help="path where to store downloaded files (default to {})".format(xls_directory),
    )
    parser.add_argument("-v", "--verbose", action="store_true", default=False, help="increase output verbosity")
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.verbose else logging.WARNING, stream=sys.stdout)
    build_excel(os.path.join(args.target, "foyers_imposables_imposes.xls"))
コード例 #6
0
# -*- coding: utf-8 -*-


import logging
import numpy
import os
import pandas
import pkg_resources
import re


from ipp_macro_series_parser.config import Config

config_parser = Config()

xls_directory = config_parser.get('data', 'denombrements_fiscaux_xls')
hdf_directory = config_parser.get('data', 'denombrements_fiscaux_hdf')


log = logging.getLogger(__name__)


def parse_ipp_denombrements():

    file_path = os.path.join(xls_directory, u'Agrégats IPP - Données fiscales.xls')

    def parse_bloc(name = None, sheetname = '2042-montant', skiprows = 0, parse_cols = None, slice_start = None,
                   slice_end = None, prefix = ''):
        assert name is not None
        df = pandas.read_excel(
            file_path,
コード例 #7
0
# -*- coding: utf-8 -*-
"""Download http://www.insee.fr/fr/ppp/bases-de-donnees/donnees-detaillees/bilan-demo/fichiers-xls/
"""

import argparse
import logging
import os
import sys
import urllib

from ipp_macro_series_parser.config import Config

app_name = os.path.splitext(os.path.basename(__file__))[0]
log = logging.getLogger(app_name)

parser = Config()
demographie_directory = parser.get('data', 'demographie_directory')
assert demographie_directory != 'None', \
    "Set demographie_directory in the data section of you config[_local].ini file to a valid directory"


# Download a the xls file from url and unzipp it in directory
def age_structure_downloader(years=None, directory=demographie_directory):
    assert years is not None
    if type(years) is int:
        years = [years]
    if not os.path.exists(directory):
        os.makedirs(directory)

    base_url = 'http://www.insee.fr/fr/ppp/bases-de-donnees/donnees-detaillees/bilan-demo/fichiers-xls/'
コード例 #8
0
# -*- coding: utf-8 -*-
"""
Created on Fri Jul 17 15:01:56 2015

@author: thomas.douenne
"""

import os
import pkg_resources
import pandas as pd
import numpy as np

from ipp_macro_series_parser.config import Config

parser = Config(
    config_files_directory = os.path.join(pkg_resources.get_distribution('ipp-macro-series-parser').location)
    )

transports_directory = parser.get('data', 'transports_directory')
a_activite_economique = parser.get('data', 'a_activite_economique')
d_developpement_durable = parser.get('data', 'd_developpement_durable')
f_voyageurs = parser.get('data', 'f_voyageurs')
g_bilan_circulation = parser.get('data', 'g_bilan_circulation')


def transports_parser(excelfile_name, onglet):
    data_frame = pd.read_excel(excelfile_name, sheetname = onglet, skiprows = 2)
    data_frame.rename(columns = {'Unnamed: 0': 'index'}, inplace = True)
    data_frame = data_frame.dropna(thresh = 3)
    data_frame.fillna('-', inplace = True)
    return data_frame
コード例 #9
0
# -*- coding: utf-8 -*-

import logging
import os
import pandas
import numpy

from ipp_macro_series_parser.config import Config
from ipp_macro_series_parser.comptes_nationaux.parser_tee import tee_df_by_year_generator
from ipp_macro_series_parser.comptes_nationaux.parser_non_tee import non_tee_df_by_filename_generator

log = logging.getLogger(__name__)

parser = Config()
hdf_directory = parser.get('data', 'cn_hdf_directory')


def cn_df_generator(year, list_years=None, drop_duplicates=True, subset=None):
    """
    Generates the table with all the data from Comptabilite Nationale.

    Parameters
    ----------
    year : int
        year of INSEE data realease
    list_years : list of integers
        list of years of interest. Optional.

    Example
    --------
    >>> year = 2013
コード例 #10
0
        for gender, dataframe in data_by_gender.items():
            dataframe.index.name = 'age'
            dataframe.columns.name = 'period'
            dataframe = dataframe.stack('period').reset_index()
            dataframe[
                'sexe'] = False if gender == 'male' else True  # homme = False, femme = True
            dataframe.rename(columns={0: 'value'}, inplace=True)
            dataframe = dataframe.set_index(['period', 'sexe', 'age'])
            assert len(dataframe.columns) == 1
            dataframes.append(dataframe)

        return pd.concat(dataframes).sort_index()


if __name__ == '__main__':
    config = Config()
    insee_projections_directory = config.get('data', 'insee_projections')
    insee_2070_projections_filename_by_hypothese = {
        'centrale': 'Proj_démo_INSEE_2016_Hypothèse_centrale.xls',
        'jeune': 'Proj_démo_INSEE_2016_Population_jeune.xls',
        'vieille': 'Proj_démo_INSEE_2016_Population_vieille.xls',
    }
    for hypothese, filename in insee_2070_projections_filename_by_hypothese.items(
    ):
        input_file_path = os.path.join(insee_projections_directory, filename)
        output_path = os.path.join('/home/benbel/temp', hypothese,
                                   'population.csv')
        df = build_population(input_file_path=input_file_path)
        check_directory_existence(os.path.dirname(output_path))
        df.to_csv(output_path)
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 21 13:57:18 2015

@author: thomas.douenne
"""

import pandas as pd

from ipp_macro_series_parser.config import Config

parser = Config()

transports_directory = parser.get('data', 'transports_directory')
prix_annuel_carburants = parser.get('data', 'prix_annuel_carburants_90_2014')
prix_mensuel_carburants = parser.get('data', 'prix_mensuel_carburants_90_2015')


def prix_carburants_parser(excelfile_name):
    data_frame = pd.read_excel(excelfile_name, header = 2)
    data_frame = data_frame.dropna(how = 'all')
    data_frame['Date'] = data_frame['Date'].astype(str)
    data_frame.fillna('   ', inplace = True)
    data_frame = data_frame[data_frame.ix[:, 1] != '   ']
    data_frame = data_frame[data_frame.ix[:, 0] != 'en euro par litre']
    data_frame.rename(columns = {'Super carburant': 'super_plombe_ht'}, inplace = True)
    data_frame.rename(columns = {'Super carburant.1': 'super_plombe_ttc'}, inplace = True)
    data_frame.rename(columns = {'Gazole': 'diesel_ht'}, inplace = True)
    data_frame.rename(columns = {'Gazole.1': 'diesel_ttc'}, inplace = True)
    data_frame.rename(columns = {'Super SP95': 'super_95_ht'}, inplace = True)
    data_frame.rename(columns = {'Super SP95.1': 'super_95_ttc'}, inplace = True)
コード例 #12
0
# -*- coding: utf-8 -*-
"""
Created on Thu Jul 16 14:13:25 2015

@author: Antoine
"""

import os
import pkg_resources

from ipp_macro_series_parser.config import Config
from ipp_macro_series_parser.comptes_nationaux.parser_main import get_comptes_nationaux_data
from ipp_macro_series_parser.data_extraction import look_many

parser = Config(config_files_directory=os.path.join(
    pkg_resources.get_distribution('ipp-macro-series-parser').location))
excel_output_directory = parser.get('data', 'cn_csv_directory')


def reshape_to_long_for_output(df):
    """
    Unmelts the data, using the years as variables (columns).

    Parameters
    ----------
    df : DataFrame
        DataFrame generated by get_comptes_nationaux_data(year) and/or look_many(df, my_selection)

    Example
    --------
    >>> from ipp_macro_series_parser.comptes_nationaux.cn_parser_main import get_comptes_nationaux_data
コード例 #13
0
Created on Thu Jul 16 14:13:25 2015

@author: Antoine
"""


import os
import pkg_resources

from ipp_macro_series_parser.config import Config
from ipp_macro_series_parser.comptes_nationaux.parser_main import get_comptes_nationaux_data
from ipp_macro_series_parser.data_extraction import look_many


parser = Config(
    config_files_directory = os.path.join(pkg_resources.get_distribution('ipp-macro-series-parser').location)
    )
excel_output_directory = parser.get('data', 'cn_csv_directory')


def reshape_to_long_for_output(df):
    """
    Unmelts the data, using the years as variables (columns).

    Parameters
    ----------
    df : DataFrame
        DataFrame generated by get_comptes_nationaux_data(year) and/or look_many(df, my_selection)

    Example
    --------
コード例 #14
0
# -*- coding: utf-8 -*-

import logging
import numpy
import os
import pandas
import pkg_resources
import re

from ipp_macro_series_parser.config import Config

config_parser = Config(config_files_directory=os.path.join(
    pkg_resources.get_distribution('ipp-macro-series-parser').location))

xls_directory = config_parser.get('data', 'denombrements_fiscaux_xls')
hdf_directory = config_parser.get('data', 'denombrements_fiscaux_hdf')

log = logging.getLogger(__name__)


def parse_ipp_denombrements():

    file_path = os.path.join(xls_directory,
                             u'Agrégats IPP - Données fiscales.xls')

    def parse_bloc(name=None,
                   sheetname='2042-montant',
                   skiprows=0,
                   parse_cols=None,
                   slice_start=None,
                   slice_end=None,
コード例 #15
0
# -*- coding: utf-8 -*-


import logging
import os
import pandas
import pkg_resources
from ipp_macro_series_parser.config import Config

config_parser = Config()
xls_directory = os.path.join(config_parser.get('data', 'demographie_directory'), 'xls')


log = logging.getLogger(__name__)


def create_demographie_data_frame():
    data_frame = pandas.DataFrame()
    for year in range(1999, 2015 + 1):
        file_path = os.path.join(xls_directory, u'pyramide-des-ages-{}.xls'.format(year))
        skiprows = 5 - (year == 1999)
        parse_cols = "A:E"
        slice_start = 0
        slice_end = 101
        sheetname = 'France'

        if year <= 2010:
            sheetnames = ['France', u'France métropolitaine']
        elif year == 2011:
            sheetnames = ['{} France'.format(year), u"{} métropole".format(year)]
        else:
コード例 #16
0
# -*- coding: utf-8 -*-

import logging
import os
import pandas
import pkg_resources
from ipp_macro_series_parser.config import Config

config_parser = Config()
xls_directory = os.path.join(
    config_parser.get('data', 'demographie_directory'), 'xls')

log = logging.getLogger(__name__)


def create_demographie_data_frame():
    data_frame = pandas.DataFrame()
    for year in range(1999, 2015 + 1):
        file_path = os.path.join(xls_directory,
                                 u'pyramide-des-ages-{}.xls'.format(year))
        skiprows = 5 - (year == 1999)
        parse_cols = "A:E"
        slice_start = 0
        slice_end = 101
        sheetname = 'France'

        if year <= 2010:
            sheetnames = ['France', u'France métropolitaine']
        elif year == 2011:
            sheetnames = [
                '{} France'.format(year), u"{} métropole".format(year)
コード例 #17
0
import numpy
import os
import pandas
import pkg_resources


from py_expression_eval import Parser


from ipp_macro_series_parser.config import Config
from ipp_macro_series_parser.denombrements_fiscaux.parsers import (
    get_denombrements_fiscaux_data_frame)
from ipp_macro_series_parser.data_extraction import get_or_construct_value

config_parser = Config(
    config_files_directory = os.path.join(pkg_resources.get_distribution('ipp-macro-series-parser').location)
    )


def update_index_by_variable_name_appearing_in_formula(index_by_variable_name, formula):
    parser_formula = Parser()
    expr = parser_formula.parse(formula)
    formula_variables = expr.variables()
    components = dict(
        (formula_variable, {'code': formula_variable}) for formula_variable in formula_variables
        )
    index_by_variable_name.update(components)
    return index_by_variable_name


def create_index_by_variable_name(formula_by_variable_name, level_2_formula_by_variable_name = None):
コード例 #18
0
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 21 13:57:18 2015

@author: thomas.douenne
"""

import pandas as pd

from ipp_macro_series_parser.config import Config

parser = Config()

transports_directory = parser.get('data', 'transports_directory')
prix_annuel_carburants = parser.get('data', 'prix_annuel_carburants_90_2014')
prix_mensuel_carburants = parser.get('data', 'prix_mensuel_carburants_90_2015')


def prix_carburants_parser(excelfile_name):
    data_frame = pd.read_excel(excelfile_name, header=2)
    data_frame = data_frame.dropna(how='all')
    data_frame['Date'] = data_frame['Date'].astype(str)
    data_frame.fillna('   ', inplace=True)
    data_frame = data_frame[data_frame.ix[:, 1] != '   ']
    data_frame = data_frame[data_frame.ix[:, 0] != 'en euro par litre']
    data_frame.rename(columns={'Super carburant': 'super_plombe_ht'},
                      inplace=True)
    data_frame.rename(columns={'Super carburant.1': 'super_plombe_ttc'},
                      inplace=True)
    data_frame.rename(columns={'Gazole': 'diesel_ht'}, inplace=True)
    data_frame.rename(columns={'Gazole.1': 'diesel_ttc'}, inplace=True)
コード例 #19
0
# -*- coding: utf-8 -*-

import os
import urllib

from ipp_macro_series_parser.config import Config

parser = Config()
transports_directory = parser.get('data', 'transports_directory')
assert os.path.exists(
    transports_directory), "{} is not a valid directory".format(
        transports_directory)


def getunzipped(theurl, thedir, file_name):
    name = os.path.join(thedir, file_name)
    if not os.path.exists(thedir):
        os.makedirs(thedir)
    try:
        name, hdrs = urllib.urlretrieve(theurl, name)
    except IOError as e:
        print("Can't retrieve %r to %r: %s" % (theurl, thedir, e))
        return


to_be_downloaded = [
    'a-transport-et-activite-economique',
    'b-entreprises-francaises-de-transport', 'c-transport-emploi-remuneration',
    'd-transport-developpement-durable', 'e-transport-de-marchandises',
    'f-transport-de-voyageurs', 'g-bilan-de-circulation'
]
コード例 #20
0
# -*- coding: utf-8 -*-

import os
import pkg_resources
import pandas as pd
import numpy as np

from ipp_macro_series_parser.config import Config

parser = Config()

transports_directory = parser.get('data', 'transports_directory')
a_activite_economique = parser.get('data', 'a_activite_economique')
d_developpement_durable = parser.get('data', 'd_developpement_durable')
f_voyageurs = parser.get('data', 'f_voyageurs')
g_bilan_circulation = parser.get('data', 'g_bilan_circulation')


def transports_parser(excelfile_name, onglet):
    data_frame = pd.read_excel(excelfile_name, sheetname=onglet, skiprows=2)
    data_frame.rename(columns={'Unnamed: 0': 'index'}, inplace=True)
    data_frame = data_frame.dropna(thresh=3)
    data_frame.fillna('-', inplace=True)
    return data_frame


def transports_parser_categ(excelfile_name, onglet):
    data_frame = pd.read_excel(excelfile_name, sheetname=onglet, skiprows=2)
    data_frame.rename(columns={'Unnamed: 0': 'index'}, inplace=True)
    data_frame['categorie'] = np.nan
    data_frame.loc[data_frame[2005].isnull(), 'categorie'] = \
コード例 #21
0

import pandas
import os
import pkg_resources
from pandas.util.testing import assert_frame_equal

from ipp_macro_series_parser.comptes_nationaux import parser_tee
from ipp_macro_series_parser.comptes_nationaux import parser_non_tee
from ipp_macro_series_parser.comptes_nationaux.parser_main import get_comptes_nationaux_data
from ipp_macro_series_parser.data_extraction import get_or_construct_value, get_or_construct_data
from ipp_macro_series_parser.comptes_nationaux.sheets_lists import generate_CN1_variables
from ipp_macro_series_parser.comptes_nationaux.cn_test import read_CN1, read_profits_societes, create_dict_profits

from ipp_macro_series_parser.config import Config
parser = Config()
cn_csv = parser.get('data', 'cn_csv_directory')
tests_data = os.path.join(
    pkg_resources.get_distribution('ipp-macro-series-parser').location,
    'ipp_macro_series_parser/tests/data')


def test_duplicate_tee_df():
    folder_year = 2013
    tee_df_by_year = parser_tee.tee_df_by_year_generator(folder_year)
    for key, df in tee_df_by_year.items():
        for element in df.duplicated():
            assert element == 0, "There are duplicate rows in TEE " + key + ", in folder: comptes_annees " + folder_year


def test_duplicate_non_tee_df():
コード例 #22
0
# -*- coding: utf-8 -*-
"""
Created on Fri Jul 17 14:18:26 2015

@author: thomas.douenne
"""

import os
import pkg_resources
import urllib

from ipp_macro_series_parser.config import Config


parser = Config(
    config_files_directory = os.path.join(pkg_resources.get_distribution('ipp-macro-series-parser').location)
    )
transports_directory = parser.get('data', 'transports_directory')


def getunzipped(theurl, thedir, file_name):
    name = os.path.join(thedir, file_name)
    if not os.path.exists(thedir):
        os.makedirs(thedir)
    try:
        name, hdrs = urllib.urlretrieve(theurl, name)
    except IOError, e:
        print "Can't retrieve %r to %r: %s" % (theurl, thedir, e)
        return

to_be_downloaded = ['a-transport-activite-economique', 'b-entreprises',
コード例 #23
0
# -*- coding: utf-8 -*-

import os

from ipp_macro_series_parser.config import Config
from ipp_macro_series_parser.comptes_nationaux.parser_main import get_comptes_nationaux_data
from ipp_macro_series_parser.data_extraction import look_many

parser = Config()
excel_output_directory = parser.get('data', 'cn_csv_directory')


def reshape_to_long_for_output(df):
    """
    Unmelts the data, using the years as variables (columns).

    Parameters
    ----------
    df : DataFrame
        DataFrame generated by get_comptes_nationaux_data(year) and/or look_many(df, my_selection)

    Example
    --------
    >>> from ipp_macro_series_parser.comptes_nationaux.cn_parser_main import get_comptes_nationaux_data
    >>> from ipp_macro_series_parser.data_extraction import look_many
    >>> table2013 = get_comptes_nationaux_data(2013)
    >>> my_selection = [{'code': None, 'institution': 'S1', 'ressources': False,
    ...             'description': 'PIB'},
    ...             {'code': None, 'institution': 'S1', 'ressources': False,
    ...             'description': 'PIN'}]
    >>> df = look_many(table2013, my_selection)
コード例 #24
0
# -*- coding: utf-8 -*-
"""Parse dénombrements fiscaux to produce the dataframe stroed in a HDF5 file
"""

import argparse
import logging
import os
import sys

from ipp_macro_series_parser.config import Config
from ipp_macro_series_parser.denombrements_fiscaux.denombrements_parsers import (
    create_denombrements_fiscaux_data_frame)
app_name = os.path.splitext(os.path.basename(__file__))[0]
log = logging.getLogger(app_name)

parser = Config()
denombrements_fiscaux_xls_directory = parser.get('data',
                                                 'denombrements_fiscaux_xls')
denombrements_fiscaux_hdf_directory = parser.get('data',
                                                 'denombrements_fiscaux_hdf')

assert denombrements_fiscaux_xls_directory != 'None', \
    "Set denombrements_fiscaux_xls in the data section of you config[_local].ini file to a valid directory"

assert os.path.exists(os.path.join(denombrements_fiscaux_xls_directory, 'D2042Nat')), \
    "The D2042Nat containing the DGFiP files doesn't exist"
assert os.path.exists(os.path.join(denombrements_fiscaux_xls_directory, '2042_national.xls')), \
    "The 2042_national.xls containing the openfisca data doesn't exist"
assert os.path.exists(os.path.join(denombrements_fiscaux_xls_directory, 'Agrégats IPP - Données fiscales.xls')), \
    "Agrégats IPP - Données fiscales.xls containing the openfisca data doesn't exist"
コード例 #25
0
def load_actual_data(year = None):
    assert year is not None
    parser = Config()
    # Cotisations CSG -CRDS
    try:
        directory = os.path.join(
            parser.get('data', 'prelevements_sociaux_directory'),
            'clean',
            )
        csg_crds_amounts = pd.read_csv(
            os.path.join(directory, 'recette_csg_crds.csv'),
            index_col = 0
            ).rename(
                dict(
                    recette_csg = 'csg',
                    recette_crds = 'crds',
                    )
                ) / 1e6
        csg_by_type_amounts = pd.read_csv(
            os.path.join(directory, 'recette_csg_by_type.csv'),
            index_col = 0,
            ).drop(
                ['source']
                ).astype(float) / 1e6
        assiette_csg_by_type_amounts = pd.read_csv(
            os.path.join(directory, 'assiette_csg_by_type.csv'),
            index_col = 0,
            ) / 1e6
    except:
        assiette_csg_by_type_amounts = None
        csg_by_type_amounts = None
        csg_crds_amounts = None
        pass
    # Prestations sociales
    directory = os.path.join(
        parser.get('data', 'prestations_sociales_directory'),
        'clean',
        )
    amounts_csv = os.path.join(directory, 'historique_depenses.csv')
    beneficiaries_csv = os.path.join(directory, 'historique_beneficiaires.csv')
    prestations_sociales_amounts = pd.read_csv(amounts_csv, index_col = 0)
    prestations_sociales_beneficiaries = pd.read_csv(beneficiaries_csv, index_col = 0)
    # Minimum vieillesses
    minimum_vieillesse_beneficiaries_csv = os.path.join(
        directory, 'historique_beneficiaires_minimum_vieillesse.csv')
    if os.path.exists(minimum_vieillesse_beneficiaries_csv):
        minimum_vieillesse_beneficiaries = pd.read_csv(minimum_vieillesse_beneficiaries_csv, index_col = 0)

    amounts = pd.concat(
        [
            assiette_csg_by_type_amounts,
            csg_by_type_amounts,
            csg_crds_amounts,
            prestations_sociales_amounts,
            ],
        sort = True,
        )
    beneficiaries = pd.concat(
        [minimum_vieillesse_beneficiaries, prestations_sociales_beneficiaries],
        sort = True,
        )

    return pd.DataFrame(data = {
        "actual_amount": amounts[str(year)],
        "actual_beneficiaries": beneficiaries[str(year)],
        })
コード例 #26
0
# -*- coding: utf-8 -*-

import os
import pandas
import pkg_resources
from ipp_macro_series_parser.config import Config
from pandas.util.testing import assert_frame_equal

from ipp_macro_series_parser.comptes_nationaux.parser_main import get_comptes_nationaux_data
from ipp_macro_series_parser.data_extraction import (look_many, look_up,
                                                     get_or_construct_value,
                                                     get_or_construct_data)
from ipp_macro_series_parser.comptes_nationaux.sheets_lists import generate_CN1_variables, generate_CN2_variables

parser = Config(config_files_directory=os.path.join(
    pkg_resources.get_distribution('ipp-macro-series-parser').location))
cn_directory = parser.get('data', 'cn_directory')
cn_hdf = parser.get('data', 'cn_hdf_directory')
cn_csv = parser.get('data', 'cn_csv_directory')
tests_directory = parser.get('data', 'tests_directory')

tests_data = os.path.join(
    pkg_resources.get_distribution('ipp-macro-series-parser').location,
    'ipp_macro_series_parser/tests/data')


def get_tidy_data(year):
    df = get_comptes_nationaux_data(year)
    return df

コード例 #27
0
# -*- coding: utf-8 -*-


import os
import pandas
import pkg_resources
from ipp_macro_series_parser.config import Config

from ipp_macro_series_parser.comptes_nationaux.parser_main import get_comptes_nationaux_data
from ipp_macro_series_parser.data_extraction import (
    look_many, look_up, get_or_construct_value, get_or_construct_data)
from ipp_macro_series_parser.comptes_nationaux.sheets_lists import variables_CN1, variables_CN2

parser = Config(
    config_files_directory = os.path.join(pkg_resources.get_distribution('ipp-macro-series-parser').location)
    )
cn_directory = parser.get('data', 'cn_directory')
cn_hdf = parser.get('data', 'cn_hdf_directory')
cn_csv = parser.get('data', 'cn_csv_directory')
tests_directory = parser.get('data', 'tests_directory')

tests_data = os.path.join(
    pkg_resources.get_distribution('ipp-macro-series-parser').location,
    'ipp_macro_series_parser/tests/data')

df = get_comptes_nationaux_data(2013)

values_CN1, formulas_CN1 = get_or_construct_data(df, variables_CN1, range(1949, 2014))
values_CN2, formulas_CN2 = get_or_construct_data(df, variables_CN2, range(1949, 2014))
コード例 #28
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-d',
                        '--download',
                        action='store_true',
                        help="download all input files from their web sources")
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        default=False,
                        help="increase output verbosity")
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        default=None,
                        help="output directory")
    parser.add_argument('-p',
                        '--pop_input',
                        type=str,
                        default=None,
                        help="input directory for population files")
    parser.add_argument('-w',
                        '--weight',
                        default=200,
                        help="weight used for TIL-France")
    # TODO remove weight from here
    parser.add_argument(
        '-t',
        '--til_input',
        default=None,
        help="input directory for til-specific files (dependance)")
    args = parser.parse_args()

    logging.basicConfig(
        level=logging.DEBUG if args.verbose else logging.WARNING,
        stream=sys.stdout)

    if not os.path.isabs(args.output):
        output_dir = os.path.abspath(args.output)
    else:
        output_dir = args.output

    if not os.path.exists(output_dir):
        log.info('Creating directory {}'.format(output_dir))
        os.makedirs(output_dir)

    if args.download and (args.til_input or args.pop_input):
        parser.error("-d cannot be used with -p nor -t")
        sys.exit(-1)

    if args.til_input and not args.weight:
        print("--weight 200 used by default")

    if args.download:
        dpd.main()
        files = ['insee_projections', 'drees_dependance']
        output_dirs_by_file = {
            file: Config().get('data', file)
            for file in files
        }
        pop_input = output_dirs_by_file['insee_projections']
        til_input = output_dirs_by_file['drees_dependance']

    else:
        pop_input = os.path.abspath(args.pop_input)
        assert os.path.exists(pop_input)

        til_input = args.til_input

        if til_input is not None:
            til_input = os.path.abspath(args.til_input)
            assert os.path.exists(til_input)
        else:
            til_input = None

    run_all(
        pop_input_dir=pop_input,
        til_input_dir=til_input,
        parameters_dir=output_dir,
        uniform_weight=int(args.weight),
    )
コード例 #29
0
#
# TAXIPP is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import logging
import os
import pandas
import pkg_resources
from ipp_macro_series_parser.config import Config

config_parser = Config(config_files_directory=os.path.join(
    pkg_resources.get_distribution('ipp-macro-series-parser').location))
xls_directory = os.path.join(
    config_parser.get('data', 'demographie_directory'), 'xls')

log = logging.getLogger(__name__)


def create_demographie_data_frame():
    data_frame = pandas.DataFrame()
    for year in range(1999, 2015 + 1):
        file_path = os.path.join(xls_directory,
                                 u'pyramide-des-ages-{}.xls'.format(year))
        skiprows = 5 - (year == 1999)
        parse_cols = "A:E"
        slice_start = 0
        slice_end = 101
コード例 #30
0
# -*- coding: utf-8 -*-

import collections
import numpy
import pandas

from py_expression_eval import Parser

from ipp_macro_series_parser.config import Config
from ipp_macro_series_parser.denombrements_fiscaux.denombrements_parsers import (
    get_denombrements_fiscaux_data_frame)
from ipp_macro_series_parser.data_extraction import get_or_construct_value

config_parser = Config()


def update_index_by_variable_name_appearing_in_formula(index_by_variable_name,
                                                       formula):
    parser_formula = Parser()
    try:
        expr = parser_formula.parse(formula)
    except Exception, e:
        print formula
        raise (e)
    formula_variables = expr.variables()
    components = dict((formula_variable, {
        'code': formula_variable
    }) for formula_variable in formula_variables)
    index_by_variable_name.update(components)
    return index_by_variable_name
コード例 #31
0
# -*- coding: utf-8 -*-
"""
Created on Fri Jul 17 14:18:26 2015

@author: thomas.douenne
"""

import os
import pkg_resources
import urllib

from ipp_macro_series_parser.config import Config

parser = Config(config_files_directory=os.path.join(
    pkg_resources.get_distribution('ipp-macro-series-parser').location))
transports_directory = parser.get('data', 'transports_directory')


def getunzipped(theurl, thedir, file_name):
    name = os.path.join(thedir, file_name)
    if not os.path.exists(thedir):
        os.makedirs(thedir)
    try:
        name, hdrs = urllib.urlretrieve(theurl, name)
    except IOError, e:
        print "Can't retrieve %r to %r: %s" % (theurl, thedir, e)
        return


to_be_downloaded = [
    'a-transport-et-activite-economique',
コード例 #32
0

import argparse
import logging
import os
import sys
import urllib
import urllib2

from ipp_macro_series_parser.config import Config


app_name = os.path.splitext(os.path.basename(__file__))[0]
log = logging.getLogger(app_name)

parser = Config()
prestations_sociales_directory = parser.get('data', 'prestations_sociales_directory')
assert prestations_sociales_directory != 'None', \
    "Set prestations_sociales_directory in the data section of you config[_local].ini file to a valid directory"
prestations_sociales_raw = os.path.join(
    prestations_sociales_directory,
    'raw',
    )


def minimum_vieillesse_downloader(directory = prestations_sociales_raw):
    if not os.path.exists(directory):
        log.info('Creating directory {} since it does not exist.'.format(directory))
        os.makedirs(directory)
    # http://www.statistiques-recherches.cnav.fr/le-minimum-vieillesse.html
    statistiques_recherches_cnav_fr = os.path.join(directory, 'statistiques_recherches_cnav_fr')
コード例 #33
0
#! /usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import unicode_literals
import logging
import os
import platform
import sys
import pandas as pd

from ipp_macro_series_parser.config import Config

app_name = os.path.splitext(os.path.basename(__file__))[0]
log = logging.getLogger(app_name)

parser = Config()
prelevements_sociaux_source = parser.get(
    'data', 'prelevements_sociaux_source').decode('utf-8')
prelevements_sociaux_directory = parser.get('data',
                                            'prelevements_sociaux_directory')

assert prelevements_sociaux_source != 'None', \
    "Set prelevements_sociaux_source in the data section of you config[_local].ini file to a valid directory"
assert prelevements_sociaux_directory != 'None', \
    "Set prelevements_sociaux_directory in the data section of you config[_local].ini file to a valid directory"


def prelevements_sociaux_downloader():

    sheetname1 = 'CSG-CRDS (V&M)'
    sheetname2 = 'Recettes CSG (CCSS)'
コード例 #34
0
import argparse
import logging
import os
import pkg_resources
import shutil
import sys
import urllib
import zipfile

from ipp_macro_series_parser.config import Config

app_name = os.path.splitext(os.path.basename(__file__))[0]
log = logging.getLogger(app_name)

parser = Config(config_files_directory=os.path.join(pkg_resources.get_distribution("ipp-macro-series-parser").location))
cn_directory = parser.get("data", "cn_directory")
assert cn_directory != "None", "Set cn_directory in the data section of your config_local.ini file to a valid directory"


# Download a zip file from theurl and unzip it in directory thedir
def getunzipped(url=None, directory=None):
    assert url and directory
    name = os.path.join(directory, "source_insee.zip")
    if not os.path.exists(directory):
        os.makedirs(directory)
    try:
        log.info("Downloading {}/{}".format(url, name))
        name, hdrs = urllib.urlretrieve(url, name)
    except IOError, e:
        log.info("Can't retrieve %r to %r: %s" % (url, directory, e))
コード例 #35
0
import os
import pandas
import pkg_resources
import numpy


from ipp_macro_series_parser.config import Config
from ipp_macro_series_parser.comptes_nationaux.parser_tee import tee_df_by_year_generator
from ipp_macro_series_parser.comptes_nationaux.parser_non_tee import non_tee_df_by_filename_generator


log = logging.getLogger(__name__)


parser = Config(
    config_files_directory = os.path.join(pkg_resources.get_distribution('ipp-macro-series-parser').location)
    )
hdf_directory = parser.get('data', 'cn_hdf_directory')


def cn_df_generator(year, list_years = None, drop_duplicates = True, subset = None):
    """
    Generates the table with all the data from Comptabilite Nationale.

    Parameters
    ----------
    year : int
        year of INSEE data realease
    list_years : list of integers
        list of years of interest. Optional.
コード例 #36
0
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 21 13:57:18 2015

@author: thomas.douenne
"""

import os
import pkg_resources
import pandas as pd

from ipp_macro_series_parser.config import Config

parser = Config(config_files_directory=os.path.join(
    pkg_resources.get_distribution('ipp-macro-series-parser').location))

transports_directory = parser.get('data', 'transports_directory')
prix_annuel_carburants = parser.get('data', 'prix_annuel_carburants_90_2014')
prix_mensuel_carburants = parser.get('data', 'prix_mensuel_carburants_90_2015')


def prix_carburants_parser(excelfile_name):
    data_frame = pd.read_excel(excelfile_name, header=2)
    data_frame = data_frame.dropna(how='all')
    data_frame['Date'] = data_frame['Date'].astype(str)
    data_frame.fillna('   ', inplace=True)
    data_frame = data_frame[data_frame.ix[:, 1] != '   ']
    data_frame = data_frame[data_frame.ix[:, 0] != 'en euro par litre']
    data_frame.rename(columns={'Super carburant': 'super_plombe_ht'},
                      inplace=True)
    data_frame.rename(columns={'Super carburant.1': 'super_plombe_ttc'},
コード例 #37
0
#! /usr/bin/env python
# -*- coding: utf-8 -*-

import logging
import pandas as pd
import os
from slugify import slugify
"""Parse dépenses and bénéficiaires of prestataions sociales to produce the dataframe stored in a HDF5 file or csv files
"""

from ipp_macro_series_parser.config import Config

log = logging.getLogger(__name__)

parser = Config()
prestations_sociales_directory = parser.get('data',
                                            'prestations_sociales_directory')


def build_data_frame(section):
    assert section in ['beneficiaires', 'depenses']

    directory = os.path.join(
        prestations_sociales_directory,
        'raw',
        'caf_data_fr',
        'les-{}-tous-regimes-de-prestations-familiales-et-sociales'.format(
            section),
    )
    prefix = 'DepTR' if section == 'depenses' else 'BenTR'
コード例 #38
0
# -*- coding: utf-8 -*-

import os
import pandas
import pkg_resources
from ipp_macro_series_parser.config import Config
from pandas.util.testing import assert_frame_equal

from ipp_macro_series_parser.comptes_nationaux.parser_main import get_comptes_nationaux_data
from ipp_macro_series_parser.data_extraction import (look_many, look_up,
                                                     get_or_construct_value,
                                                     get_or_construct_data)
from ipp_macro_series_parser.comptes_nationaux.sheets_lists import generate_CN1_variables, generate_CN2_variables

parser = Config()
cn_directory = parser.get('data', 'cn_directory')
cn_hdf = parser.get('data', 'cn_hdf_directory')
cn_csv = parser.get('data', 'cn_csv_directory')
tests_directory = parser.get('data', 'tests_directory')

tests_data = os.path.join(
    pkg_resources.get_distribution('ipp-macro-series-parser').location,
    'ipp_macro_series_parser/tests/data')


def get_tidy_data(year):
    df = get_comptes_nationaux_data(year)
    return df


# INPUTS
コード例 #39
0
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


import os
import pandas
import pkg_resources


from ipp_macro_series_parser.config import Config
from ipp_macro_series_parser.denombrements_fiscaux.agregats_ipp import build_irpp_tables

config_parser = Config(
    config_files_directory=os.path.join(pkg_resources.get_distribution("ipp-macro-series-parser").location)
)
xls_directory = config_parser.get("data", "denombrements_fiscaux_xls")
file_path = os.path.join(xls_directory, u"Agrégats IPP - Données fiscales.xls")
sheetname = "calculs calage"


def error_msg(irpp_table_name, variable, year, target, actual):
    msg = """
In table {} on year {}, error on variable {}:
should be {} instead of {}
""".format(
        irpp_table_name, year, variable, target, actual
    )
    return msg
コード例 #40
0
import logging
import os
import pkg_resources
import sys
import urllib


from ipp_macro_series_parser.config import Config


app_name = os.path.splitext(os.path.basename(__file__))[0]
log = logging.getLogger(app_name)


parser = Config(
    config_files_directory = os.path.join(pkg_resources.get_distribution('ipp-macro-series-parser').location)
    )
demographie_directory = parser.get('data', 'demographie_directory')
assert demographie_directory != 'None', \
    "Set demographie_directory_directory in the data section of you config[_local].ini file to a valid directory"


# Download a the xls file from url and unzipp it in directory thedir
def demographie_downloader(years = None, directory = demographie_directory):
    assert years is not None
    if type(years) is int:
        years = [years]
    if not os.path.exists(directory):
        os.makedirs(directory)

    base_url = 'http://www.insee.fr/fr/ppp/bases-de-donnees/donnees-detaillees/bilan-demo/fichiers-xls/'