def main(): parser = argparse.ArgumentParser() parser.add_argument('-v', '--verbose', action = 'store_true', default = False, help = "increase output verbosity") parser.add_argument('-f', '--force', action = 'store_true', default = False, help = "Force overwrite of existing data") args = parser.parse_args() logging.basicConfig(level = logging.DEBUG if args.verbose else logging.WARNING, stream = sys.stdout) config = Config() prelevements_sociaux_source = config.get('data', 'prelevements_sociaux_source') prelevements_sociaux_directory = config.get('data', 'prelevements_sociaux_directory') assert prelevements_sociaux_source != 'None', \ "Set prelevements_sociaux_source in the data section of you config[_local].ini file to a valid directory" assert prelevements_sociaux_directory != 'None', \ "Set prelevements_sociaux_directory in the data section of you config[_local].ini file to a valid directory" clean_directory = os.path.join(prelevements_sociaux_directory, 'clean') if not os.path.exists(clean_directory): os.makedirs(clean_directory) file_path1 = os.path.join(clean_directory, 'recette_csg_crds.csv') file_path2 = os.path.join(clean_directory, 'recette_csg_by_type.csv') if os.path.exists(file_path1) or os.path.exists(file_path2): if not args.force: log.error("The files {} and/or {} already exist. Use the --force to overwrite.".format(file_path1, file_path2)) return main_parse()
def load_actual_data(year=None): assert year is not None parser = Config() # Cotisations CSG -CRDS try: directory = os.path.join( parser.get('data', 'prelevements_sociaux_directory'), 'clean', ) csg_crds_amounts = pd.read_csv( os.path.join(directory, 'recette_csg_crds.csv'), index_col=0).rename(dict( recette_csg='csg', recette_crds='crds', )) / 1e6 csg_by_type_amounts = pd.read_csv( os.path.join(directory, 'recette_csg_by_type.csv'), index_col=0, ).drop(['source']).astype(float) / 1e6 assiette_csg_by_type_amounts = pd.read_csv( os.path.join(directory, 'assiette_csg_by_type.csv'), index_col=0, ) / 1e6 except: assiette_csg_by_type_amounts = None csg_by_type_amounts = None csg_crds_amounts = None pass # Prestations sociales directory = os.path.join( parser.get('data', 'prestations_sociales_directory'), 'clean', ) amounts_csv = os.path.join(directory, 'historique_depenses.csv') beneficiaries_csv = os.path.join(directory, 'historique_beneficiaires.csv') prestations_sociales_amounts = pd.read_csv(amounts_csv, index_col=0) prestations_sociales_beneficiaries = pd.read_csv(beneficiaries_csv, index_col=0) # Minimum vieillesses minimum_vieillesse_beneficiaries_csv = os.path.join( directory, 'historique_beneficiaires_minimum_vieillesse.csv') if os.path.exists(minimum_vieillesse_beneficiaries_csv): minimum_vieillesse_beneficiaries = pd.read_csv( minimum_vieillesse_beneficiaries_csv, index_col=0) amounts = pd.concat([ assiette_csg_by_type_amounts, csg_by_type_amounts, csg_crds_amounts, prestations_sociales_amounts, ]) beneficiaries = pd.concat( [minimum_vieillesse_beneficiaries, prestations_sociales_beneficiaries]) return pd.DataFrame( data={ "actual_amount": amounts[str(year)], "actual_beneficiaries": beneficiaries[str(year)], })
def main(): parser = argparse.ArgumentParser() parser.add_argument('-v', '--verbose', action='store_true', default=False, help="increase output verbosity") parser.add_argument('-f', '--force', action='store_true', default=False, help="Force overwrite of existing data") args = parser.parse_args() logging.basicConfig( level=logging.DEBUG if args.verbose else logging.WARNING, stream=sys.stdout) config = Config() directory = config.get('data', 'prestations_sociales_directory') assert directory != 'None', \ "Set prestations_sociales_directory in the data section of you config[_local].ini file to a valid directory" hdf_file_path = os.path.join(directory, 'prestations_sociales.h5') if os.path.exists(hdf_file_path): if not args.force: log.error( "The file {} already exists. Use the --force to overwrite.". format(hdf_file_path)) return create_prestations_sociales_data_frames()
def main(): parser = Config() xls_directory = parser.get("data", "denombrements_fiscaux_xls") assert ( xls_directory != "None" ), "Set denombrements_fiscaux_xls in the data section of your config_local.ini file to a valid directory" parser = argparse.ArgumentParser() parser.add_argument( "-t", "--target", default=xls_directory, help="path where to store downloaded files (default to {})".format(xls_directory), ) parser.add_argument("-v", "--verbose", action="store_true", default=False, help="increase output verbosity") args = parser.parse_args() logging.basicConfig(level=logging.DEBUG if args.verbose else logging.WARNING, stream=sys.stdout) build_excel(os.path.join(args.target, "foyers_imposables_imposes.xls"))
def main(): parser = argparse.ArgumentParser() # parser.add_argument('-e', '--end', default = 2013, help = 'ending year to be downloaded') # parser.add_argument('-s', '--start', default = 2009, help = 'starting year to be downloaded') parser.add_argument('-v', '--verbose', action = 'store_true', default = False, help = "increase output verbosity") parser.add_argument('-f', '--force', action = 'store_true', default = False, help = "Force overwrite of existing data") args = parser.parse_args() logging.basicConfig(level = logging.DEBUG if args.verbose else logging.WARNING, stream = sys.stdout) config = Config() directory = config.get('data', 'prestations_sociales_directory') assert directory != 'None', \ "Set prestations_sociales_directory in the data section of you config[_local].ini file to a valid directory" # years = range(args.start, args.end + 1) hdf_file_path = os.path.join(directory, 'prestations_sociales.h5') if os.path.exists(hdf_file_path): if not args.force: log.error("The file {} already exists. Use the --force to overwrite.".format(hdf_file_path)) return create_prestations_sociales_data_frames()
# -*- coding: utf-8 -*- import os import urllib from ipp_macro_series_parser.config import Config parser = Config() transports_directory = parser.get('data', 'transports_directory') assert os.path.exists( transports_directory), "{} is not a valid directory".format( transports_directory) def getunzipped(theurl, thedir, file_name): name = os.path.join(thedir, file_name) if not os.path.exists(thedir): os.makedirs(thedir) try: name, hdrs = urllib.urlretrieve(theurl, name) except IOError as e: print("Can't retrieve %r to %r: %s" % (theurl, thedir, e)) return to_be_downloaded = [ 'a-transport-et-activite-economique', 'b-entreprises-francaises-de-transport', 'c-transport-emploi-remuneration', 'd-transport-developpement-durable', 'e-transport-de-marchandises', 'f-transport-de-voyageurs', 'g-bilan-de-circulation' ]
"""Download http://www.insee.fr/fr/ppp/bases-de-donnees/donnees-detaillees/bilan-demo/fichiers-xls/ """ import argparse import logging import os import sys import urllib from ipp_macro_series_parser.config import Config app_name = os.path.splitext(os.path.basename(__file__))[0] log = logging.getLogger(app_name) parser = Config() demographie_directory = parser.get('data', 'demographie_directory') assert demographie_directory != 'None', \ "Set demographie_directory in the data section of you config[_local].ini file to a valid directory" # Download a the xls file from url and unzipp it in directory def age_structure_downloader(years=None, directory=demographie_directory): assert years is not None if type(years) is int: years = [years] if not os.path.exists(directory): os.makedirs(directory) base_url = 'http://www.insee.fr/fr/ppp/bases-de-donnees/donnees-detaillees/bilan-demo/fichiers-xls/' for year in years:
# -*- coding: utf-8 -*- import logging import numpy import os import pandas import pkg_resources import re from ipp_macro_series_parser.config import Config config_parser = Config() xls_directory = config_parser.get('data', 'denombrements_fiscaux_xls') hdf_directory = config_parser.get('data', 'denombrements_fiscaux_hdf') log = logging.getLogger(__name__) def parse_ipp_denombrements(): file_path = os.path.join(xls_directory, u'Agrégats IPP - Données fiscales.xls') def parse_bloc(name = None, sheetname = '2042-montant', skiprows = 0, parse_cols = None, slice_start = None, slice_end = None, prefix = ''): assert name is not None df = pandas.read_excel( file_path,
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import logging import os import pandas import pkg_resources from ipp_macro_series_parser.config import Config config_parser = Config(config_files_directory=os.path.join( pkg_resources.get_distribution('ipp-macro-series-parser').location)) xls_directory = os.path.join( config_parser.get('data', 'demographie_directory'), 'xls') log = logging.getLogger(__name__) def create_demographie_data_frame(): data_frame = pandas.DataFrame() for year in range(1999, 2015 + 1): file_path = os.path.join(xls_directory, u'pyramide-des-ages-{}.xls'.format(year)) skiprows = 5 - (year == 1999) parse_cols = "A:E" slice_start = 0 slice_end = 101 sheetname = 'France'
# -*- coding: utf-8 -*- import os import pandas import pkg_resources from ipp_macro_series_parser.config import Config from ipp_macro_series_parser.comptes_nationaux.parser_main import get_comptes_nationaux_data from ipp_macro_series_parser.data_extraction import ( look_many, look_up, get_or_construct_value, get_or_construct_data) from ipp_macro_series_parser.comptes_nationaux.sheets_lists import variables_CN1, variables_CN2 parser = Config( config_files_directory = os.path.join(pkg_resources.get_distribution('ipp-macro-series-parser').location) ) cn_directory = parser.get('data', 'cn_directory') cn_hdf = parser.get('data', 'cn_hdf_directory') cn_csv = parser.get('data', 'cn_csv_directory') tests_directory = parser.get('data', 'tests_directory') tests_data = os.path.join( pkg_resources.get_distribution('ipp-macro-series-parser').location, 'ipp_macro_series_parser/tests/data') df = get_comptes_nationaux_data(2013) values_CN1, formulas_CN1 = get_or_construct_data(df, variables_CN1, range(1949, 2014)) values_CN2, formulas_CN2 = get_or_construct_data(df, variables_CN2, range(1949, 2014))
def load_actual_data(year = None): assert year is not None parser = Config() # Cotisations CSG -CRDS try: directory = os.path.join( parser.get('data', 'prelevements_sociaux_directory'), 'clean', ) csg_crds_amounts = pd.read_csv( os.path.join(directory, 'recette_csg_crds.csv'), index_col = 0 ).rename( dict( recette_csg = 'csg', recette_crds = 'crds', ) ) / 1e6 csg_by_type_amounts = pd.read_csv( os.path.join(directory, 'recette_csg_by_type.csv'), index_col = 0, ).drop( ['source'] ).astype(float) / 1e6 assiette_csg_by_type_amounts = pd.read_csv( os.path.join(directory, 'assiette_csg_by_type.csv'), index_col = 0, ) / 1e6 except: assiette_csg_by_type_amounts = None csg_by_type_amounts = None csg_crds_amounts = None pass # Prestations sociales directory = os.path.join( parser.get('data', 'prestations_sociales_directory'), 'clean', ) amounts_csv = os.path.join(directory, 'historique_depenses.csv') beneficiaries_csv = os.path.join(directory, 'historique_beneficiaires.csv') prestations_sociales_amounts = pd.read_csv(amounts_csv, index_col = 0) prestations_sociales_beneficiaries = pd.read_csv(beneficiaries_csv, index_col = 0) # Minimum vieillesses minimum_vieillesse_beneficiaries_csv = os.path.join( directory, 'historique_beneficiaires_minimum_vieillesse.csv') if os.path.exists(minimum_vieillesse_beneficiaries_csv): minimum_vieillesse_beneficiaries = pd.read_csv(minimum_vieillesse_beneficiaries_csv, index_col = 0) amounts = pd.concat( [ assiette_csg_by_type_amounts, csg_by_type_amounts, csg_crds_amounts, prestations_sociales_amounts, ], sort = True, ) beneficiaries = pd.concat( [minimum_vieillesse_beneficiaries, prestations_sociales_beneficiaries], sort = True, ) return pd.DataFrame(data = { "actual_amount": amounts[str(year)], "actual_beneficiaries": beneficiaries[str(year)], })
Created on Fri Jul 17 14:18:26 2015 @author: thomas.douenne """ import os import pkg_resources import urllib from ipp_macro_series_parser.config import Config parser = Config( config_files_directory = os.path.join(pkg_resources.get_distribution('ipp-macro-series-parser').location) ) transports_directory = parser.get('data', 'transports_directory') def getunzipped(theurl, thedir, file_name): name = os.path.join(thedir, file_name) if not os.path.exists(thedir): os.makedirs(thedir) try: name, hdrs = urllib.urlretrieve(theurl, name) except IOError, e: print "Can't retrieve %r to %r: %s" % (theurl, thedir, e) return to_be_downloaded = ['a-transport-activite-economique', 'b-entreprises', 'c-transport-emploi-remuneration', 'd-transport-developpement-durable', 'e-transport-marchandises', 'f-transports-voyageurs-b', 'g-bilan-circulation']
import pandas import os import pkg_resources from pandas.util.testing import assert_frame_equal from ipp_macro_series_parser.comptes_nationaux import parser_tee from ipp_macro_series_parser.comptes_nationaux import parser_non_tee from ipp_macro_series_parser.comptes_nationaux.parser_main import get_comptes_nationaux_data from ipp_macro_series_parser.data_extraction import get_or_construct_value, get_or_construct_data from ipp_macro_series_parser.comptes_nationaux.sheets_lists import generate_CN1_variables from ipp_macro_series_parser.comptes_nationaux.cn_test import read_CN1, read_profits_societes, create_dict_profits from ipp_macro_series_parser.config import Config parser = Config() cn_csv = parser.get('data', 'cn_csv_directory') tests_data = os.path.join( pkg_resources.get_distribution('ipp-macro-series-parser').location, 'ipp_macro_series_parser/tests/data') def test_duplicate_tee_df(): folder_year = 2013 tee_df_by_year = parser_tee.tee_df_by_year_generator(folder_year) for key, df in tee_df_by_year.items(): for element in df.duplicated(): assert element == 0, "There are duplicate rows in TEE " + key + ", in folder: comptes_annees " + folder_year def test_duplicate_non_tee_df(): folder_year = 2013
# -*- coding: utf-8 -*- import os import pkg_resources import pandas as pd import numpy as np from ipp_macro_series_parser.config import Config parser = Config() transports_directory = parser.get('data', 'transports_directory') a_activite_economique = parser.get('data', 'a_activite_economique') d_developpement_durable = parser.get('data', 'd_developpement_durable') f_voyageurs = parser.get('data', 'f_voyageurs') g_bilan_circulation = parser.get('data', 'g_bilan_circulation') def transports_parser(excelfile_name, onglet): data_frame = pd.read_excel(excelfile_name, sheetname=onglet, skiprows=2) data_frame.rename(columns={'Unnamed: 0': 'index'}, inplace=True) data_frame = data_frame.dropna(thresh=3) data_frame.fillna('-', inplace=True) return data_frame def transports_parser_categ(excelfile_name, onglet): data_frame = pd.read_excel(excelfile_name, sheetname=onglet, skiprows=2) data_frame.rename(columns={'Unnamed: 0': 'index'}, inplace=True) data_frame['categorie'] = np.nan data_frame.loc[data_frame[2005].isnull(), 'categorie'] = \
# -*- coding: utf-8 -*- import logging import os import pandas import pkg_resources from ipp_macro_series_parser.config import Config config_parser = Config() xls_directory = os.path.join(config_parser.get('data', 'demographie_directory'), 'xls') log = logging.getLogger(__name__) def create_demographie_data_frame(): data_frame = pandas.DataFrame() for year in range(1999, 2015 + 1): file_path = os.path.join(xls_directory, u'pyramide-des-ages-{}.xls'.format(year)) skiprows = 5 - (year == 1999) parse_cols = "A:E" slice_start = 0 slice_end = 101 sheetname = 'France' if year <= 2010: sheetnames = ['France', u'France métropolitaine'] elif year == 2011: sheetnames = ['{} France'.format(year), u"{} métropole".format(year)] else:
for gender, dataframe in data_by_gender.items(): dataframe.index.name = 'age' dataframe.columns.name = 'period' dataframe = dataframe.stack('period').reset_index() dataframe[ 'sexe'] = False if gender == 'male' else True # homme = False, femme = True dataframe.rename(columns={0: 'value'}, inplace=True) dataframe = dataframe.set_index(['period', 'sexe', 'age']) assert len(dataframe.columns) == 1 dataframes.append(dataframe) return pd.concat(dataframes).sort_index() if __name__ == '__main__': config = Config() insee_projections_directory = config.get('data', 'insee_projections') insee_2070_projections_filename_by_hypothese = { 'centrale': 'Proj_démo_INSEE_2016_Hypothèse_centrale.xls', 'jeune': 'Proj_démo_INSEE_2016_Population_jeune.xls', 'vieille': 'Proj_démo_INSEE_2016_Population_vieille.xls', } for hypothese, filename in insee_2070_projections_filename_by_hypothese.items( ): input_file_path = os.path.join(insee_projections_directory, filename) output_path = os.path.join('/home/benbel/temp', hypothese, 'population.csv') df = build_population(input_file_path=input_file_path) check_directory_existence(os.path.dirname(output_path)) df.to_csv(output_path)
"""Parse dénombrements fiscaux to produce the dataframe stroed in a HDF5 file """ import argparse import logging import os import sys from ipp_macro_series_parser.config import Config from ipp_macro_series_parser.denombrements_fiscaux.denombrements_parsers import ( create_denombrements_fiscaux_data_frame) app_name = os.path.splitext(os.path.basename(__file__))[0] log = logging.getLogger(app_name) parser = Config() denombrements_fiscaux_xls_directory = parser.get('data', 'denombrements_fiscaux_xls') denombrements_fiscaux_hdf_directory = parser.get('data', 'denombrements_fiscaux_hdf') assert denombrements_fiscaux_xls_directory != 'None', \ "Set denombrements_fiscaux_xls in the data section of you config[_local].ini file to a valid directory" assert os.path.exists(os.path.join(denombrements_fiscaux_xls_directory, 'D2042Nat')), \ "The D2042Nat containing the DGFiP files doesn't exist" assert os.path.exists(os.path.join(denombrements_fiscaux_xls_directory, '2042_national.xls')), \ "The 2042_national.xls containing the openfisca data doesn't exist" assert os.path.exists(os.path.join(denombrements_fiscaux_xls_directory, 'Agrégats IPP - Données fiscales.xls')), \ "Agrégats IPP - Données fiscales.xls containing the openfisca data doesn't exist" def main():
# -*- coding: utf-8 -*- """ Created on Tue Jul 21 13:57:18 2015 @author: thomas.douenne """ import pandas as pd from ipp_macro_series_parser.config import Config parser = Config() transports_directory = parser.get('data', 'transports_directory') prix_annuel_carburants = parser.get('data', 'prix_annuel_carburants_90_2014') prix_mensuel_carburants = parser.get('data', 'prix_mensuel_carburants_90_2015') def prix_carburants_parser(excelfile_name): data_frame = pd.read_excel(excelfile_name, header = 2) data_frame = data_frame.dropna(how = 'all') data_frame['Date'] = data_frame['Date'].astype(str) data_frame.fillna(' ', inplace = True) data_frame = data_frame[data_frame.ix[:, 1] != ' '] data_frame = data_frame[data_frame.ix[:, 0] != 'en euro par litre'] data_frame.rename(columns = {'Super carburant': 'super_plombe_ht'}, inplace = True) data_frame.rename(columns = {'Super carburant.1': 'super_plombe_ttc'}, inplace = True) data_frame.rename(columns = {'Gazole': 'diesel_ht'}, inplace = True) data_frame.rename(columns = {'Gazole.1': 'diesel_ttc'}, inplace = True) data_frame.rename(columns = {'Super SP95': 'super_95_ht'}, inplace = True) data_frame.rename(columns = {'Super SP95.1': 'super_95_ttc'}, inplace = True)
import argparse import logging import os import sys import urllib import urllib2 from ipp_macro_series_parser.config import Config app_name = os.path.splitext(os.path.basename(__file__))[0] log = logging.getLogger(app_name) parser = Config() prestations_sociales_directory = parser.get('data', 'prestations_sociales_directory') assert prestations_sociales_directory != 'None', \ "Set prestations_sociales_directory in the data section of you config[_local].ini file to a valid directory" prestations_sociales_raw = os.path.join( prestations_sociales_directory, 'raw', ) def minimum_vieillesse_downloader(directory = prestations_sociales_raw): if not os.path.exists(directory): log.info('Creating directory {} since it does not exist.'.format(directory)) os.makedirs(directory) # http://www.statistiques-recherches.cnav.fr/le-minimum-vieillesse.html statistiques_recherches_cnav_fr = os.path.join(directory, 'statistiques_recherches_cnav_fr') if not os.path.exists(statistiques_recherches_cnav_fr):
@author: thomas.douenne """ import os import pkg_resources import pandas as pd import numpy as np from ipp_macro_series_parser.config import Config parser = Config( config_files_directory = os.path.join(pkg_resources.get_distribution('ipp-macro-series-parser').location) ) transports_directory = parser.get('data', 'transports_directory') a_activite_economique = parser.get('data', 'a_activite_economique') d_developpement_durable = parser.get('data', 'd_developpement_durable') f_voyageurs = parser.get('data', 'f_voyageurs') g_bilan_circulation = parser.get('data', 'g_bilan_circulation') def transports_parser(excelfile_name, onglet): data_frame = pd.read_excel(excelfile_name, sheetname = onglet, skiprows = 2) data_frame.rename(columns = {'Unnamed: 0': 'index'}, inplace = True) data_frame = data_frame.dropna(thresh = 3) data_frame.fillna('-', inplace = True) return data_frame def transports_parser_categ(excelfile_name, onglet):
import argparse import logging import os import pkg_resources import shutil import sys import urllib import zipfile from ipp_macro_series_parser.config import Config app_name = os.path.splitext(os.path.basename(__file__))[0] log = logging.getLogger(app_name) parser = Config(config_files_directory=os.path.join(pkg_resources.get_distribution("ipp-macro-series-parser").location)) cn_directory = parser.get("data", "cn_directory") assert cn_directory != "None", "Set cn_directory in the data section of your config_local.ini file to a valid directory" # Download a zip file from theurl and unzip it in directory thedir def getunzipped(url=None, directory=None): assert url and directory name = os.path.join(directory, "source_insee.zip") if not os.path.exists(directory): os.makedirs(directory) try: log.info("Downloading {}/{}".format(url, name)) name, hdrs = urllib.urlretrieve(url, name) except IOError, e: log.info("Can't retrieve %r to %r: %s" % (url, directory, e)) return
# -*- coding: utf-8 -*- import logging import os import pandas import numpy from ipp_macro_series_parser.config import Config from ipp_macro_series_parser.comptes_nationaux.parser_tee import tee_df_by_year_generator from ipp_macro_series_parser.comptes_nationaux.parser_non_tee import non_tee_df_by_filename_generator log = logging.getLogger(__name__) parser = Config() hdf_directory = parser.get('data', 'cn_hdf_directory') def cn_df_generator(year, list_years=None, drop_duplicates=True, subset=None): """ Generates the table with all the data from Comptabilite Nationale. Parameters ---------- year : int year of INSEE data realease list_years : list of integers list of years of interest. Optional. Example -------- >>> year = 2013
""" Created on Tue Jul 21 13:57:18 2015 @author: thomas.douenne """ import os import pkg_resources import pandas as pd from ipp_macro_series_parser.config import Config parser = Config(config_files_directory=os.path.join( pkg_resources.get_distribution('ipp-macro-series-parser').location)) transports_directory = parser.get('data', 'transports_directory') prix_annuel_carburants = parser.get('data', 'prix_annuel_carburants_90_2014') prix_mensuel_carburants = parser.get('data', 'prix_mensuel_carburants_90_2015') def prix_carburants_parser(excelfile_name): data_frame = pd.read_excel(excelfile_name, header=2) data_frame = data_frame.dropna(how='all') data_frame['Date'] = data_frame['Date'].astype(str) data_frame.fillna(' ', inplace=True) data_frame = data_frame[data_frame.ix[:, 1] != ' '] data_frame = data_frame[data_frame.ix[:, 0] != 'en euro par litre'] data_frame.rename(columns={'Super carburant': 'super_plombe_ht'}, inplace=True) data_frame.rename(columns={'Super carburant.1': 'super_plombe_ttc'}, inplace=True)
# -*- coding: utf-8 -*- from __future__ import unicode_literals import logging import os import platform import sys import pandas as pd from ipp_macro_series_parser.config import Config app_name = os.path.splitext(os.path.basename(__file__))[0] log = logging.getLogger(app_name) parser = Config() prelevements_sociaux_source = parser.get( 'data', 'prelevements_sociaux_source').decode('utf-8') prelevements_sociaux_directory = parser.get('data', 'prelevements_sociaux_directory') assert prelevements_sociaux_source != 'None', \ "Set prelevements_sociaux_source in the data section of you config[_local].ini file to a valid directory" assert prelevements_sociaux_directory != 'None', \ "Set prelevements_sociaux_directory in the data section of you config[_local].ini file to a valid directory" def prelevements_sociaux_downloader(): sheetname1 = 'CSG-CRDS (V&M)' sheetname2 = 'Recettes CSG (CCSS)' sheetname3 = 'Calcul_assietteCSG'
# -*- coding: utf-8 -*- import os import pandas import pkg_resources from ipp_macro_series_parser.config import Config from pandas.util.testing import assert_frame_equal from ipp_macro_series_parser.comptes_nationaux.parser_main import get_comptes_nationaux_data from ipp_macro_series_parser.data_extraction import (look_many, look_up, get_or_construct_value, get_or_construct_data) from ipp_macro_series_parser.comptes_nationaux.sheets_lists import generate_CN1_variables, generate_CN2_variables parser = Config() cn_directory = parser.get('data', 'cn_directory') cn_hdf = parser.get('data', 'cn_hdf_directory') cn_csv = parser.get('data', 'cn_csv_directory') tests_directory = parser.get('data', 'tests_directory') tests_data = os.path.join( pkg_resources.get_distribution('ipp-macro-series-parser').location, 'ipp_macro_series_parser/tests/data') def get_tidy_data(year): df = get_comptes_nationaux_data(year) return df # INPUTS
import pkg_resources import numpy from ipp_macro_series_parser.config import Config from ipp_macro_series_parser.comptes_nationaux.parser_tee import tee_df_by_year_generator from ipp_macro_series_parser.comptes_nationaux.parser_non_tee import non_tee_df_by_filename_generator log = logging.getLogger(__name__) parser = Config( config_files_directory = os.path.join(pkg_resources.get_distribution('ipp-macro-series-parser').location) ) hdf_directory = parser.get('data', 'cn_hdf_directory') def cn_df_generator(year, list_years = None, drop_duplicates = True, subset = None): """ Generates the table with all the data from Comptabilite Nationale. Parameters ---------- year : int year of INSEE data realease list_years : list of integers list of years of interest. Optional. Example --------
@author: Antoine """ import os import pkg_resources from ipp_macro_series_parser.config import Config from ipp_macro_series_parser.comptes_nationaux.parser_main import get_comptes_nationaux_data from ipp_macro_series_parser.data_extraction import look_many parser = Config( config_files_directory = os.path.join(pkg_resources.get_distribution('ipp-macro-series-parser').location) ) excel_output_directory = parser.get('data', 'cn_csv_directory') def reshape_to_long_for_output(df): """ Unmelts the data, using the years as variables (columns). Parameters ---------- df : DataFrame DataFrame generated by get_comptes_nationaux_data(year) and/or look_many(df, my_selection) Example -------- >>> from ipp_macro_series_parser.comptes_nationaux.cn_parser_main import get_comptes_nationaux_data >>> from ipp_macro_series_parser.data_extraction import look_many
#! /usr/bin/env python # -*- coding: utf-8 -*- import logging import pandas as pd import os from slugify import slugify """Parse dépenses and bénéficiaires of prestataions sociales to produce the dataframe stored in a HDF5 file or csv files """ from ipp_macro_series_parser.config import Config log = logging.getLogger(__name__) parser = Config() prestations_sociales_directory = parser.get('data', 'prestations_sociales_directory') def build_data_frame(section): assert section in ['beneficiaires', 'depenses'] directory = os.path.join( prestations_sociales_directory, 'raw', 'caf_data_fr', 'les-{}-tous-regimes-de-prestations-familiales-et-sociales'.format( section), ) prefix = 'DepTR' if section == 'depenses' else 'BenTR' filenames = [
# -*- coding: utf-8 -*- import os from ipp_macro_series_parser.config import Config from ipp_macro_series_parser.comptes_nationaux.parser_main import get_comptes_nationaux_data from ipp_macro_series_parser.data_extraction import look_many parser = Config() excel_output_directory = parser.get('data', 'cn_csv_directory') def reshape_to_long_for_output(df): """ Unmelts the data, using the years as variables (columns). Parameters ---------- df : DataFrame DataFrame generated by get_comptes_nationaux_data(year) and/or look_many(df, my_selection) Example -------- >>> from ipp_macro_series_parser.comptes_nationaux.cn_parser_main import get_comptes_nationaux_data >>> from ipp_macro_series_parser.data_extraction import look_many >>> table2013 = get_comptes_nationaux_data(2013) >>> my_selection = [{'code': None, 'institution': 'S1', 'ressources': False, ... 'description': 'PIB'}, ... {'code': None, 'institution': 'S1', 'ressources': False, ... 'description': 'PIN'}] >>> df = look_many(table2013, my_selection)
# You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import os import pandas import pkg_resources from ipp_macro_series_parser.config import Config from ipp_macro_series_parser.denombrements_fiscaux.agregats_ipp import build_irpp_tables config_parser = Config( config_files_directory=os.path.join(pkg_resources.get_distribution("ipp-macro-series-parser").location) ) xls_directory = config_parser.get("data", "denombrements_fiscaux_xls") file_path = os.path.join(xls_directory, u"Agrégats IPP - Données fiscales.xls") sheetname = "calculs calage" def error_msg(irpp_table_name, variable, year, target, actual): msg = """ In table {} on year {}, error on variable {}: should be {} instead of {} """.format( irpp_table_name, year, variable, target, actual ) return msg fill_value = 0
import pkg_resources import sys import urllib from ipp_macro_series_parser.config import Config app_name = os.path.splitext(os.path.basename(__file__))[0] log = logging.getLogger(app_name) parser = Config( config_files_directory = os.path.join(pkg_resources.get_distribution('ipp-macro-series-parser').location) ) demographie_directory = parser.get('data', 'demographie_directory') assert demographie_directory != 'None', \ "Set demographie_directory_directory in the data section of you config[_local].ini file to a valid directory" # Download a the xls file from url and unzipp it in directory thedir def demographie_downloader(years = None, directory = demographie_directory): assert years is not None if type(years) is int: years = [years] if not os.path.exists(directory): os.makedirs(directory) base_url = 'http://www.insee.fr/fr/ppp/bases-de-donnees/donnees-detaillees/bilan-demo/fichiers-xls/' for year in years: