Beispiel #1
0
def test_lorenz_curve():
    """
    Tests `lorenz` function, which calculates the lorenz curve

    An income distribution where everyone has almost the same wealth should
    be similar to a straight line

    An income distribution where one person has almost the wealth should
    be flat and then shoot straight up when it approaches one
    """
    n = 3000

    # Almost Equal distribution
    y = np.repeat(1, n) + np.random.normal(scale=0.0001, size=n)
    cum_people, cum_income = lorenz_curve(y)
    assert_allclose(cum_people, cum_income, rtol=1e-03)

    # Very uneven distribution
    y = np.repeat(0.001, n)
    y[4] = 100000
    pop_cum, income_cum = lorenz_curve(y)
    expected_income_cum = np.repeat(0., n + 1)
    expected_income_cum[-1] = 1.
    assert_allclose(expected_income_cum, income_cum, atol=1e-4)
def test_lorenz_curve():
    """
    Tests `lorenz` function, which calculates the lorenz curve

    An income distribution where everyone has almost the same wealth should
    be similar to a straight line

    An income distribution where one person has almost the wealth should
    be flat and then shoot straight up when it approaches one
    """
    n = 3000

    # Almost Equal distribution
    y = np.repeat(1, n) + np.random.normal(scale=0.0001, size=n)
    cum_people, cum_income = lorenz_curve(y)
    assert_allclose(cum_people, cum_income, rtol=1e-03)

    # Very uneven distribution
    y = np.repeat(0.001, n)
    y[4] = 100000
    pop_cum, income_cum = lorenz_curve(y)
    expected_income_cum = np.repeat(0., n + 1)
    expected_income_cum[-1] = 1.
    assert_allclose(expected_income_cum, income_cum, atol=1e-4)
Beispiel #3
0
def update_tabs(n_clicks, reference_lines, selected_sectors, shock, empresa,
                formalidad, contrato):

    # descriptive statistics
    mediana_choque = 'No choque'
    texto_mediana = 'Mediana choque: {}'.format(mediana_choque)
    pobreza_choque = 'No choque'
    texto_pobreza = u'Índice de pobreza choque: {}'.format(pobreza_choque)

    # get context
    ctx = dash.callback_context
    if not ctx.triggered:
        return fig_hist, texto_mediana, fig_lorenz, texto_pobreza
    else:
        action_id = ctx.triggered[0]['prop_id'].split('.')[0]

    if action_id == 'apply-button':
        # ----------------------------
        # 1. Datos
        print(datetime.datetime.now())
        print('Updating data...')
        # ----------------------------

        if formalidad is None:
            formalidad = []
        if contrato is None:
            contrato = []
        if selected_sectors is None:
            selected_sectors = []

        # reestablezco la base
        df_shock = df.copy()
        # definicion de hogares en riesgo
        df_shock.loc[(((df_shock['sector'].isin(selected_sectors)) &
                       (df_shock['tipo_empresa'] == empresa)) &
                      ((df_shock['informales'].isin(formalidad)) |
                       (df_shock['cuenta_propia'].isin(contrato)))),
                     'riesgo'] = 1

        df_shock.riesgo = df_shock.riesgo.fillna(value=0)
        df_shock = update_income(df_shock, shock)

        # ----------------------------
        # 2. Histograma y mediana
        print('Updating Histogram...')
        # ----------------------------

        # clean histogram
        fig_hist.data = []

        # compute kernel
        kernel_shock = gaussian_kde(df_shock.ING_pc_choque_arriendo,
                                    weights=df_shock.fac_exp_ind_12m)

        # update median
        mediana_choque = weighted_median(df_shock, 'ING_pc_choque_arriendo',
                                         'fac_exp_ind_12m')
        texto_mediana = 'Mediana choque: {:,.0f} COP'.format(mediana_choque)

        # modified distribution
        fig_hist.add_trace(
            go.Scatter(x=xs,
                       y=kernel_shock(xs),
                       name=u'Distribución Choque',
                       line_color=naranja,
                       fill='tozeroy',
                       fillcolor='rgba(237,177,131,0.5)',
                       mode='lines'))
        # Original distribution
        fig_hist.add_trace(
            go.Scatter(x=xs,
                       y=dist_original,
                       mode='lines',
                       name=u'Distribución Original',
                       line=dict(color=black)))

        # ----------------------------
        # 3. Lorenz
        print('Updating Lorenz...')
        # ----------------------------

        # clean figure
        fig_lorenz.data = []

        fig_lorenz.add_trace(
            go.Scatter(x=[0, 1],
                       y=[0, 1],
                       mode='lines',
                       name=u'Igualdad total',
                       line=dict(color='grey', dash="dashdot")))

        # calculate new values
        f_vals_shock, l_vals_shock = qe.lorenz_curve(
            df_shock.ING_pc_choque_arriendo.to_numpy())

        # sample to plot
        ids = list(range(0, len(f_vals)))
        sample_ids = random.sample(ids, 10000)

        fig_lorenz.add_trace(
            go.Scatter(x=f_vals_shock[sample_ids],
                       y=l_vals_shock[sample_ids],
                       mode='lines',
                       name=u'Distribución Choque',
                       line_color=naranja))

        fig_lorenz.add_trace(
            go.Scatter(x=f_vals[sample_ids],
                       y=l_vals[sample_ids],
                       mode='lines',
                       name=u'Distribución Original',
                       line_color=black))
        # ----------------------------
        # 4. Poverty
        print('Updating Poverty Measures... \n')
        print('-------------------------------------')
        # ----------------------------
        pobreza_choque = calculo_pobreza(df_shock, 'ING_pc_choque_arriendo')
        texto_pobreza = u'Índice de pobreza choque: {:.2f}%'.format(
            pobreza_choque)

    elif action_id == 'reference-lines':
        lines, names = generate_reference_lines(reference_lines, dist_original)
        fig_hist.update_layout(shapes=lines, annotations=names)

    else:
        raise PreventUpdate

    return fig_hist, texto_mediana, fig_lorenz, texto_pobreza
Beispiel #4
0
               mode='lines',
               name=u'Distribución Original',
               line=dict(color=black)))

# summary statistics
median_original = weighted_median(df, 'ING_pc_bl_def_arriendo',
                                  'fac_exp_ind_12m')

# Lorenz curve and GINI (https://python.quantecon.org/wealth_dynamics.html)
# TODO: weighted version of these calculations

# start = time.time()
# gini_original = qe.gini_coefficient(df.ING_pc_bl_def_arriendo.to_numpy())
# print('Gini calculated in {} minutes'.format((time.time()-start)/60))

f_vals, l_vals = qe.lorenz_curve(df.ING_pc_bl_def_arriendo.to_numpy())

fig_lorenz = go.Figure(
    layout=go.Layout(title=go.layout.Title(text=u"Curva de Lorenz"),
                     xaxis=go.layout.XAxis(
                         title="Porcentaje acumulado de personas"),
                     yaxis=go.layout.YAxis(
                         title="Porcentaje acumulado del ingreso"),
                     plot_bgcolor='white',
                     paper_bgcolor='white',
                     font_color='grey',
                     showlegend=True))

# sample to plot
ids = list(range(0, len(f_vals)))
sample_ids = random.sample(ids, 10000)