Ejemplo n.º 1
0
def output_for_sheets(entry_by_index_list, version_year, csv_file_name):
    """
    Output the final data needed to recreate a sheet of "Agrégats IPP - Comptabilité nationale" into a csv file.

    Parameters
    ----------
    entry_by_index : dictionnary
        A dictionnary with keys 'code', 'institution', 'ressources', 'year', 'description'.
    version_year : int
        Year of the version of Comptabilité Nationale data the user wishes to have (most often the latest version).
    csv_file_name : string
        path to the output csv file. Extension should be .txt (and not .csv) in order to be read by Excel.

    Example
    --------
    >>> CN1 = cn_output.output_for_sheets(
    ...    cn_sheets_lists.list_CN1, 2013,
    ...    os.path.join(cn_directory, u'Agrégats IPP - Comptabilité nationale.txt')
    ...    )

    Returns None. Creates a csv file containing the values of all the variables needed to construct sheet CN1, for all
    years. Each column is a variable, i.e. a tuple containing the agregates's code, the institution concerned,
    whether it is Ressources or Emplois, and the file from where the data was extracted.

    Note
    ------
    The first drop_duplicates() should be unnecessary : the presence of drop_duplicates in the parsers should
    eliminate the need for that.

    The second drop_duplicates(), i.e. a drop_duplicates on all variables except those referring to the source file,
    is essential to avoid the same data being repeated in different columns when it is sourced from different files
    (typically, TEE and a Comptes nationaux file).
    """
    list_variables = entry_by_index_list
    table = get_comptes_nationaux_data(version_year)

    extract = look_many(table, list_variables)
    extract = extract.drop_duplicates()
    extract = extract.drop_duplicates(
        (u'code', u'institution', u'ressources', u'value', u'year')
    )  # this eliminates doubles, i.e. identical info coming from distinct sources (eg. TEE and Compte)

    df = reshape_to_long_for_output(extract)

    df_long_to_csv(df, csv_file_name)
    return df
def output_for_sheets(entry_by_index_list, version_year, csv_file_name):
    """
    Output the final data needed to recreate a sheet of "Agrégats IPP - Comptabilité nationale" into a csv file.

    Parameters
    ----------
    entry_by_index : dictionnary
        A dictionnary with keys 'code', 'institution', 'ressources', 'year', 'description'.
    version_year : int
        Year of the version of Comptabilité Nationale data the user wishes to have (most often the latest version).
    csv_file_name : string
        path to the output csv file. Extension should be .txt (and not .csv) in order to be read by Excel.

    Example
    --------
    >>> CN1 = cn_output.output_for_sheets(
    ...    cn_sheets_lists.list_CN1, 2013,
    ...    os.path.join(cn_directory, u'Agrégats IPP - Comptabilité nationale.txt')
    ...    )

    Returns None. Creates a csv file containing the values of all the variables needed to construct sheet CN1, for all
    years. Each column is a variable, i.e. a tuple containing the agregates's code, the institution concerned,
    whether it is Ressources or Emplois, and the file from where the data was extracted.

    Note
    ------
    The first drop_duplicates() should be unnecessary : the presence of drop_duplicates in the parsers should
    eliminate the need for that.

    The second drop_duplicates(), i.e. a drop_duplicates on all variables except those referring to the source file,
    is essential to avoid the same data being repeated in different columns when it is sourced from different files
    (typically, TEE and a Comptes nationaux file).
    """
    list_variables = entry_by_index_list
    table = get_comptes_nationaux_data(version_year)

    extract = look_many(table, list_variables)
    extract = extract.drop_duplicates()
    extract = extract.drop_duplicates((u'code', u'institution', u'ressources', u'value',
       u'year'))  # this eliminates doubles, i.e. identical info coming from distinct sources (eg. TEE and Compte)

    df = reshape_to_long_for_output(extract)

    df_long_to_csv(df, csv_file_name)
    return df
Ejemplo n.º 3
0
def test_look_many():
    df = get_tidy_data(2013)
    list_ENE = create_list_dicts_for_look_many()
    ene_societes = look_many(df, list_ENE)
    return ene_societes
Ejemplo n.º 4
0
def test_look_many():
    df = get_tidy_data(2013)
    list_ENE = create_list_dicts_for_look_many()
    ene_societes = look_many(df, list_ENE)
    return ene_societes