Example #1
0
def load_libelles_emploi_data(decennie=None,
                              debug=False,
                              force_recreate=False):
    assert decennie is not None
    libemploi_h5 = os.path.join(libelles_emploi_directory,
                                'libemploi_{}.h5'.format(decennie))
    if os.path.exists(libemploi_h5) and not force_recreate:
        libemplois = pd.read_hdf(libemploi_h5, 'libemploi')
        log.info("Libellés emploi read from {}".format(libemploi_h5))
    else:
        libemploi = get_careers(variable='libemploi',
                                decennie=decennie,
                                debug=debug)
        statut = get_careers(variable='statut', decennie=decennie, debug=debug)
        libemploi = (libemploi.merge(
            statut.query("statut in ['T', 'H']"),
            how='inner',
        ))
        libemploi['libemploi_slugified'] = libemploi.libemploi.apply(
            slugify, separator="_")
        libemploi.rename(columns=dict(statut='versant'), inplace=True)
        libemplois = libemploi.groupby(
            [u'annee', u'versant'])['libemploi_slugified'].value_counts()
        log.info(
            "Generating and saving libellés emploi to {}".format(libemploi_h5))
        libemplois.to_hdf(libemploi_h5, 'libemploi')
    libemplois = libemplois.loc[2006:2014, ]
    return libemplois
Example #2
0
def load_libelles(data_path=None, debug=False):
    libemploi = get_careers(variable='libemploi',
                            data_path=data_path,
                            debug=debug)
    lib_cir = get_careers(variable='lib_cir', data_path=data_path, debug=debug)
    lib_netneh = get_careers(variable='lib_netneh',
                             data_path=data_path,
                             debug=debug)
    libemploi['libemploi_slugified'] = libemploi.libemploi.apply(slugify,
                                                                 separator="_")
    statut = get_careers(variable='f_coll', data_path=data_path, debug=debug)
    statut['statut'] = statut['f_coll']
    libemploi = libemploi.merge(lib_cir, how='inner', on=["ident", "annee"])
    libemploi = libemploi.merge(lib_netneh, how='inner', on=["ident", "annee"])
    libemploi = (libemploi.merge(
        statut.query("statut in ['T', 'H']"),
        how='inner',
    ))
    # Condition de filtrage des libellés cf. Isabelle Bridenne:
    # " il faut sélectionner  if lib_cir_AAAA^= « » and lib_netneh=  « »    ; ce sont les cas non redressés."
    filtered_libemploi = libemploi.loc[(libemploi.lib_cir != '')
                                       & (libemploi.lib_netneh == '')]
    filtered_libemploi = filtered_libemploi.loc[:, [
        "ident", "libemploi", "annee", "libemploi_slugified"
    ]]
    return filtered_libemploi
Example #3
0
def build_destinations_dataframes(decennie = None):
    carrieres = get_careers(variable = 'c_netneh', decennie = decennie).sort_values(['ident', 'annee'])
    carrieres = carrieres.query('annee > 2010')
    destinations = get_destinations_dataframe(carrieres)
    purged_carrieres = clean_empty_netneh(get_careers(variable = 'c_netneh', decennie = decennie)
        .query('annee > 2010')
        .sort_values(['ident', 'annee'])
        )
    purged_destinations = get_destinations_dataframe(purged_carrieres)

    return destinations, purged_destinations
Example #4
0
def load_libelles(decennie = 1980, debug = False, year = 2014):
    libemploi = get_careers(variable = 'libemploi', decennie = decennie, debug = debug)
    libemploi['libemploi_slugified'] = libemploi.libemploi.apply(slugify, separator = "_")
    statut = get_careers(variable = 'statut', decennie = decennie, debug = debug)
    libemploi = (libemploi
        .merge(
            statut,
            how = 'inner',
            )
        )
    libemploi = libemploi[libemploi.annee >= year]
    return libemploi
Example #5
0
def load_libelles(decennie=None, debug=False):
    libemploi = get_careers(variable='libemploi',
                            decennie=decennie,
                            debug=debug)
    libemploi['libemploi_slugified'] = libemploi.libemploi.apply(slugify,
                                                                 separator="_")
    statut = get_careers(variable='statut', decennie=decennie, debug=debug)
    libemploi = (libemploi.merge(
        statut.query("statut in ['T', 'H']"),
        how='inner',
    ))
    libemploi = libemploi[libemploi.libemploi != '']
    return libemploi
Example #6
0
def build_transitions_pct_by_grade_initial(decennie = 1970):
    """Compute the distribution of the number of transitions condtionnal of the first grade"""
    carrieres = get_careers(variable = 'c_netneh', decennie = decennie).sort_values(['ident', 'annee'])

    def get_transitions_grade_init(carrieres):
        df = pd.DataFrame()
        selection = carrieres.c_netneh.shift().notnull() & (carrieres.ident == carrieres.shift().ident)
        df['ident'] = carrieres.ident[selection]
        premiere_annee = carrieres.annee.min()  # analysis:ignore

        premier_grade = carrieres.query('annee == @premiere_annee')[['ident', 'c_netneh']]
        df['transition'] = (carrieres.c_netneh != carrieres.c_netneh.shift())[selection]
        transitions = (df
            .merge(premier_grade, on = 'ident', how = 'left')
            .groupby(['ident', 'c_netneh'])['transition']
            .sum()
            .astype(int)
            .reset_index()
            )
        result = pd.DataFrame()
        result['population'] = (transitions
            .groupby('c_netneh')['transition']
            .value_counts()
            .sort_values(ascending = False)
            .cumsum()
            )
        result['cdf'] = result.population / result.population.max()
        return result

    return pd.concat(dict(
        annees_2010_2014 = get_transitions_grade_init(carrieres),
        annees_2011_2014 = get_transitions_grade_init(carrieres[carrieres.annee != 2010]),
        annees_2010_2014_purgees = get_transitions_grade_init(clean_empty_netneh(carrieres)),
        annees_2011_2014_purgees = get_transitions_grade_init(clean_empty_netneh(carrieres.query('annee != 2010'))),
        ))
Example #7
0
def build_destinations_by_grade(decennie = None):
    """Compute the distribution of the number of destination grades"""
    carrieres = get_careers(variable = 'c_netneh', decennie = decennie).sort_values(['ident', 'annee'])
    return pd.concat(dict(
        annees_2010_2014 = get_destinations_by_grade(carrieres),
        annees_2011_2014 = get_destinations_by_grade(carrieres[carrieres.annee != 2010]),
        annees_2010_2014_purgees = get_destinations_by_grade(clean_empty_netneh(carrieres)),
        annees_2011_2014_purgees = get_destinations_by_grade(clean_empty_netneh(carrieres.query('annee != 2010'))),
        ))
Example #8
0
def merge_libelles_emploi_data(debug=True, decennie):

    libemploi_h5 = os.path.join(libelles_emploi_tmp_directory,
                                'libemploi_{}.h5')
    list_decennie = [1950, 1970]
    for decennie in list_decennie:
        libemploi = get_careers(variable='libemploi',
                                decennie=decennie,
                                debug=debug)
        statut = get_careers(variable='statut', decennie=decennie, debug=debug)
        libemploi = (libemploi.merge(
            statut.query("statut in ['T', 'H']"),
            how='inner',
        ))
        libemploi['libemploi_slugified'] = libemploi.libemploi.apply(
            slugify, separator="_")
        libemploi.rename(columns=dict(statut='versant'), inplace=True)
        libemplois = libemploi.groupby(
            [u'annee', u'versant'])['libemploi_slugified'].value_counts()
        log.info(
            "Generating and saving libellés emploi to {}".format(libemploi_h5))
        libemplois.to_hdf(libemploi_h5, 'libemploi')
    return libemplois
Example #9
0
def build_transitions_pct_by_echantillon(decennie = 1970):
    """Compute the distribution of the number of transitions"""
    carrieres = get_careers(variable = 'c_netneh', decennie = decennie).sort_values(['ident', 'annee'])

    def get_transitions(carrieres, normalize = True):
        transitions = pd.DataFrame()
        selection = carrieres.c_netneh.shift().notnull() & (carrieres.ident == carrieres.shift().ident)
        transitions['ident'] = carrieres.ident[selection]
        transitions['transition'] = (carrieres.c_netneh != carrieres.c_netneh.shift())[selection]
        return transitions.groupby('ident').sum().astype(int).squeeze().value_counts(normalize = normalize)

    return pd.concat(dict(
        annees_2010_2014 = get_transitions(carrieres),
        annees_2011_2014 = get_transitions(carrieres[carrieres.annee != 2010]),
        annees_2010_2014_purgees = get_transitions(clean_empty_netneh(carrieres)),  # 110 transitions,
        annees_2011_2014_purgees = get_transitions(clean_empty_netneh(carrieres.query('annee != 2010'))),  # 390701
        )).reset_index().rename(
            columns = {
                'level_0': 'echantillon',
                'level_1': 'transitions',
                'transition': 'population',
                }
            ).sort_values(['echantillon', 'transitions'])
Example #10
0
from sas7bdat import SAS7BDAT


from fonction_publique.base import raw_directory_path, clean_directory_path, get_careers, parser
from fonction_publique.merge_careers_and_legislation import get_grilles


libelles_emploi_directory = parser.get('correspondances', 'libelles_emploi_directory')
table_cdc_path = "M:/CNRACL/correspondance/LS/2017_lib2014/2017_lib2014.xlsx"


# Original libellés
decennies = [1950, 1960, 1970, 1980, 1990]
for decennie in decennies:
    print("Processing decennie {}".format(decennie))
    libemploi = get_careers(variable = 'libemploi', decennie = decennie, debug = False)
    if decennie == decennies[0]:
        libemploi_all = libemploi[libemploi.annee == 2014]
    else:
        libemploi_all = libemploi_all.append(libemploi[libemploi.annee == 2014])
libemplois2014  = libemploi_all.libemploi.value_counts()
libemplois2014 = pd.DataFrame({'libemploi':libemplois2014.index, 'nb_obs':libemplois2014.values})

# Table CDC
table_cdc = pd.read_excel(table_cdc_path, encoding = 'utf-8')
table_cdc  = table_cdc.sort('nb_obs', ascending=False)

# Merge des deux bases de libellés
data1 = table_cdc.rename(columns={'lib_cir_2014': 'lib_cir_2014', 'nb_obs': 'nb_obs_CDC'})
data1 = data1[['lib_cir_2014', 'nb_obs_CDC']]
data1.lib_cir_2014[data1.lib_cir_2014.isnull()] = ""
Example #11
0
assert valid_data_frame, 'The correspondace data frame is not valid'

libemploi_grade = (libemploi
        .merge(
            correspondance_data_frame,
            how = 'left',
            left_on = ['statut', 'annee', 'libemploi_slugified'],
            right_on = ['versant', 'annee', 'libelle'],
            )
        .drop('libelle', axis = 1)
        )

len(set(libemploi_grade.grade))

# 2. Adding code netneh
netneh = get_careers(variable = 'c_netneh', decennie = 1980, debug = debug)
netneh = netneh[netneh.annee==2014]
assert (len(netneh.ident) == len(libemploi_grade.ident))


final_merge = (libemploi_grade
        .merge(
            netneh,
            how = 'inner',
            on = ['ident', 'annee'],
            )
        )


check_list = ['ADJOINT ADMINIST HOSP 2EME CL (E03)', 'INFIRMIER DE CLASSE SUPERIEURE(*)']
Example #12
0
from __future__ import division

from fonction_publique.base import get_careers
decennie = 1950
debug = False
libemploi = get_careers(variable='libemploi',
                        decennie=decennie,
                        debug=debug,
                        where="annee >= 2000")
print libemploi.annee.value_counts()
statut = get_careers(variable='statut',
                     decennie=decennie,
                     debug=debug,
                     where="annee >= 2000")
print statut.annee.value_counts()
print statut.statut.value_counts()
libemploi = (libemploi.merge(
    statut.query("statut in ['T', 'H']"),
    how='inner',
))
libemploi_annee = libemploi.query('annee == 2000')
for statut_i in libemploi_annee.statut.unique():
    print statut_i
    print libemploi_annee.query('statut == @statut_i').libemploi.value_counts()