Ejemplo n.º 1
0
    "width": "35%",
    "height": "80px",
    "left": "17rem",
    "bottom": "10px",
    #"border": "1px solid #e7eff6",
    #"border-radius": "10px",
    'overflowY': 'scroll'
}

# ------------------------------
# 2. SQL Queries
# ------------------------------
# 2.1 Initial query
# ------------------------------
df_dropout_efficiency = def_data.runQuery("""
    select code_municip, name_municip as muni, benchmarking_rank  as rank, 
    benchmarking_efficiency as efficiency 
    from cluster_master_table_by_municipio cmtbm  ; """)
df_dropout_efficiency['efficiency'] = df_dropout_efficiency[
    'efficiency'].astype(np.float64)
df_dropout_efficiency['efficiency_percent'] = df_dropout_efficiency[
    'efficiency'].astype(float).map("{:.1%}".format)
df_dropout_efficiency.sort_values(by=['efficiency', 'muni'],
                                  ascending=[False, True],
                                  inplace=True)
# 2.1 Query function
# ------------------------------

# ------------------------------
# 3. Map
# ------------------------------
# 3.1 Loads JSON file
Ejemplo n.º 2
0
def on_button_click(n):
    if n is not None:
        # 1. Define set of input variables for DEA
        # 1.1 Get variable ids from checklist
        var_list = ''
        single_qote = "'"
        for var in sidebar_benchmarking.input_array:
            var_list = var_list + single_qote + var + single_qote + ','
        var_list = var_list[:-1]

        # 1.2 Get variable name from SQL table var_definition.
        df_var_name = def_data.runQuery(
            'select name, label from public.var_definition where var_id in (' +
            var_list + ');')

        # 2. Define the SQL query.
        benchmarking_sql_query = ''  # SQL query.
        var_col = ''  # Columns for query.
        var_res = ''  # Restrictions for query.
        are_res = ''  # Restriction for the area.

        # 2.1 Define columns and restrictions for the query
        for var in df_var_name['name']:
            var_col = var_col + var + ','
            var_res = var_res + ' and ' + var + ' is not null '
        var_col = var_col + '(dane_alu_01 - dane_alu_11) as nodropouts'
        var_res = var_res + ' and dane_alu_01 > 0; '

        # 2.2 Define the region restriction
        if sidebar_benchmarking.area_array is not None:
            are_res = 'where region = ' + single_qote + sidebar_benchmarking.area_array + single_qote + ' '

        # 2.3 Define the SQL query
        benchmarking_sql_query = 'select code_municip, name_municip as muni, ' + var_col + \
            ' from cluster_master_table_by_municipio ' + \
            are_res + var_res

        # 3. Get data from SQL table master_table_by_municipio.
        df_benchmarking_data = def_data.runQuery(benchmarking_sql_query)

        # 4. Performs DEA calculations
        df_benchmarking_data = df_benchmarking_data.rename(
            columns={'code_municip': 'DMU'})

        # 4.1 Define the input and output variables
        inp = df_var_name['name'].tolist()
        out = ['nodropouts']

        # 4.2 Load the models for Phase I and Phase II
        def_data.BCCO_Base_PH1(df_benchmarking_data, inp, out)
        def_data.BCCO_Base_PH2(df_benchmarking_data, inp, out)

        # 4.3 Creates data frame for resulting efficiencies
        ef = pd.DataFrame(
            columns=['dmu', 'efficiency', 'reference_set', 'slack'])

        # 4.4 Recover global variables

        # 4.5 Solves for all DMUs
        for dmu in df_benchmarking_data['DMU'].tolist():
            def_data.BCCO_DMU_PH1(df_benchmarking_data, dmu, inp, out)
            ph1_dual = linprog(c=def_data.obj1,
                               A_ub=def_data.lhs_ineq1,
                               b_ub=def_data.rhs_ineq1,
                               A_eq=def_data.lhs_eq1,
                               b_eq=def_data.rhs_eq1,
                               bounds=def_data.bnd1,
                               method="simplex")
            def_data.BCCO_DMU_PH2(df_benchmarking_data, dmu, -1 * ph1_dual.fun,
                                  inp, out)
            ph2 = linprog(c=def_data.obj2,
                          A_eq=def_data.lhs_eq2,
                          b_eq=def_data.rhs_eq2,
                          bounds=def_data.bnd2,
                          method="simplex")
            ef = ef.append(
                {
                    'dmu':
                    dmu,
                    'efficiency':
                    -1 / ph1_dual.fun,
                    'reference_set':
                    def_data.BCCO_DMU_REFSET(df_benchmarking_data, inp, out,
                                             ph2.x),
                    'slack':
                    def_data.BCCO_DMU_VAR(inp, out, ph1_dual.slack)
                },
                ignore_index=True)

        # 5. Process the results
        # 5.1 Merge to get municipalities names
        ef = ef.merge(df_benchmarking_data[['DMU', 'muni']],
                      left_on='dmu',
                      right_on='DMU')
        ef = ef.sort_values(by=["efficiency", 'muni'], ascending=[False, True])
        ef["rank"] = ef["efficiency"].rank(ascending=False, method='min')
        ef['efficiency_percent'] = ef['efficiency'].astype(float).map(
            "{:.1%}".format)
        ef['Municipality'] = ef['muni']

        # 5.2 Efficient Units and Reference set
        ef_dmu = ef[ef['efficiency'] >= 1][['dmu', 'muni']]
        ref_set = pd.DataFrame(ef[ef['efficiency'] < 1][[
            'reference_set'
        ]].reset_index()['reference_set'].value_counts()).reset_index()

        def convert_dmu_to_string(array):
            new_array = []
            for dmu in array:
                if dmu in list(ef_dmu['dmu']):
                    new_array.append(ef_dmu[ef_dmu['dmu'] == dmu][[
                        'muni'
                    ]].reset_index()['muni'][0])
            return new_array

        # From ref_set gets names and takeout non productive units
        refset_data = []
        for rs in ref_set['index']:
            line_set = []
            for dmu in rs:
                if dmu in list(ef_dmu['dmu']):
                    line_set.append(ef_dmu[ef_dmu['dmu'] == dmu][[
                        'muni'
                    ]].reset_index()['muni'][0])
            refset_data.append(line_set)

        # This is the new Reference Set to print on screen.
        refset_df = pd.DataFrame(
            {
                'Reference Set': refset_data,
                '# Municipalities': list(ref_set['reference_set'])
            },
            columns=['Reference Set', '# Municipalities'])

        new_group_data = refset_df.to_dict('records')

        # Refset for display in the map
        ef['Ref Municipality'] = ef['reference_set'].apply(
            convert_dmu_to_string)
        # 5.3 Slack Variables count
        slack = []
        for sl in ef['slack']:
            slack.extend(sl)
        slack_data = (pd.DataFrame(slack, columns=[
            'Slack Count'
        ]))['Slack Count'].value_counts().reset_index()
        slack_data = slack_data.merge(df_var_name,
                                      left_on='index',
                                      right_on='name',
                                      how='left')
        slack_data = slack_data.rename(columns={'label': 'Feature'})

        new_slack_graph = px.bar(slack_data,
                                 x="Feature",
                                 y="Slack Count",
                                 height=200)
        new_slack_graph.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})

        # 6. Creates new map
        # 6.1 Loads JSON file
        # # ------------------------------
        map_url = ''
        if sidebar_benchmarking.area_array is None:
            map_url = 'data/municipios_1mn.json'
        elif sidebar_benchmarking.area_array == 'Amazonica':
            map_url = 'data/amazonica90.json'
        elif sidebar_benchmarking.area_array == 'Andina':
            map_url = 'data/andina90.json'
        elif sidebar_benchmarking.area_array == 'Caribe':
            map_url = 'data/caribe90.json'
        elif sidebar_benchmarking.area_array == 'Orinoquia':
            map_url = 'data/orinoquia90.json'
        elif sidebar_benchmarking.area_array == 'Pacifica':
            map_url = 'data/pacifico90.json'

        with open(map_url) as geo:
            munijson = json.loads(geo.read())

        # 6.2 Define new map properties
        # ------------------------------
        new_Map = px.choropleth_mapbox(
            ef,  # Data
            locations=
            'dmu',  # Column containing the identifiers used in the GeoJSON file
            featureidkey=
            "properties.MPIO_CCNCT",  # Column in de JSON containing the identifier of the municipality.
            color=
            'efficiency',  # Column giving the color intensity of the region
            geojson=munijson,  # The GeoJSON file
            zoom=4,  # Zoom
            mapbox_style=
            "white-bg",  # Mapbox style, for different maps you need a Mapbox account and a token
            center={
                "lat": 4.5709,
                "lon": -74.2973
            },  # Center
            color_continuous_scale="Viridis",  # Color Scheme
            opacity=0.5,  # Opacity of the map
            hover_name='Municipality',
            hover_data=['Ref Municipality'])
        new_Map.update_geos(fitbounds="locations", visible=False)
        new_Map.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})

        new_Rank_data = ef[['rank', 'muni',
                            'efficiency_percent']].to_dict('records')

    else:
        new_Map = EF_Map
        new_Rank_data = ranking_table.data
        new_group_data = group_table.data
        new_slack_graph = slack_graph

    return [new_Map, new_Rank_data, new_group_data, new_slack_graph]
Ejemplo n.º 3
0
import dash
import dash_html_components as html
import dash_bootstrap_components as dbc
import dash_core_components as dcc
from dash.dependencies import Input, Output, State

# Recall app
from app import app
from library import def_data

df_vars = def_data.runQuery("""select * from public.var_definition;""")
df_mun = def_data.runQuery("""select * from desertion_by_municip;""")
df_master = def_data.runQuery("""select * from master_table_by_municipio;""")

# dropdown = dbc.DropdownMenu(
#     id='drop_down',
#     label="Menu",
#     children=[
#         dbc.DropdownMenuItem(value=list(df['group_id'].unique())),
#         dbc.DropdownMenuItem("Item 2"),
#         dbc.DropdownMenuItem("Item 3"),
#     ],
# )

# dropdown = dcc.Dropdown(
#     id='drop_down',
#     options=[{"label": name, "value": name} for name in names],
#     value=[name for name in names],
#     clearable=True,
#     multi=True,
# ),
# ------------------------------
SIDEBAR_STYLE = {
    "position": "fixed",
    "top": '140px',
    "left": 0,
    "bottom": 0,
    "width": "16rem",
    "padding": "2rem 1rem",
    "background-color": "#f8f9fa",
}

# ------------------------------
# 2. SQL queries
# ------------------------------
# 2.1 Query variables
df_vars = def_data.runQuery(
    """select * from public.var_definition order by group_id;""")

# ------------------------------
# 3. Accordion
# ------------------------------
var_groups = list(df_vars['group_id'].unique())
cardBody = []
for gr in var_groups:
    tempCardBody = []
    options = []
    for var_id in df_vars[df_vars['group_id'] == gr]['var_id']:
        options.append({
            'label':
            '  ' + list(df_vars[df_vars['var_id'] == str(var_id)]['label'])[0],
            'value':
            str(var_id)
Ejemplo n.º 5
0
# ------------------------------
SIDEBAR_STYLE = {
    "position": "fixed",
    "top": '140px',
    "left": 0,
    "bottom": 0,
    "width": "16rem",
    "padding": "2rem 1rem",
    "background-color": "#f8f9fa",
}

# ------------------------------
# 2. SQL queries
# ------------------------------
# 2.1 Query variables
df_vars = def_data.runQuery(
    """select * from public.var_definition order by group_id;""")

# ------------------------------
# 3. Accordion
# ------------------------------
var_groups = list(df_vars['group_id'].unique())
cardBody = []
for gr in var_groups:
    tempCardBody = []
    options = []
    for var_id in df_vars[df_vars['group_id'] == gr]['var_id']:
        options.append(
            html.Label(list(
                df_vars[df_vars['var_id'] == str(var_id)]['label'])[0],
                       id='mapLabel-' + str(var_id)))
        options.append(html.Br())
Ejemplo n.º 6
0
def build_chart(depto_val, *args):
    # 3.2.1 Determine is a variable is selected and which
    # ------------------------------
    global selected_var_code

    df_vars = sidebar_maps.df_vars.copy()
    changed_label_id = [p['prop_id']
                        for p in dash.callback_context.triggered][0]

    for i in df_vars['var_id']:
        if 'mapLabel-' + i + ".n_clicks" in changed_label_id:
            selected_var_code = i

    selected_var = df_vars.loc[df_vars['var_id'] == str(
        selected_var_code)]['name'].reset_index()['name'][0]

    single_qote = "'"

    if False:  # If no change in either the dropdown or the labels, then do nothing.
        return map_fig
    else:
        # 3.2.2 Map by department (drop down mun is None)
        if depto_val is None:
            # 3.2.2.1 Query desired variable
            # ------------------------------
            selected_var = "desertion_perc"
            sql_query = 'select code_dept, name_dept, avg(' + selected_var + ') as ' + selected_var + ' ' + \
                'from cluster_master_table_by_municipio ' + \
                'group by code_dept, name_dept;'
            df_var_all_dpto = def_data.runQuery(sql_query)
            df_var_all_dpto[selected_var] = df_var_all_dpto[
                selected_var].astype(np.float64)
            label_fig = df_vars[df_vars['name'] ==
                                selected_var].reset_index()['label'][0]
            label_tittle = df_vars[
                df_vars['name'] ==
                selected_var].reset_index()['description'][0]

            # 3.2.2.2 Define new map
            # ------------------------------
            new_map = px.choropleth_mapbox(
                df_var_all_dpto,
                geojson=DEP_json,
                color=selected_var,
                locations="code_dept",
                featureidkey="properties.DPTO_CCDGO",
                color_continuous_scale="Blues",
                center={
                    "lat": 4.94,
                    "lon": -73.77
                },
                hover_name="name_dept",
                mapbox_style="carto-positron",
                zoom=4)

            new_map.update_layout(margin={
                "r": 0,
                "l": 0,
                "b": 0
            },
                                  title_text=label_tittle,
                                  coloraxis_colorbar=dict(title=label_fig))
            return new_map
        # 3.2.3 Map by municipality (drop down mun is not None)
        else:
            # 3.2.3.1 Query desired variable
            # ------------------------------

            mylist = {
                'dane_alu_18_p', 'dane_tic_01', 'dane_alu_12_p',
                'dane_tic_03_1_p', 'po_pob_rural_10mil'
            }

            if selected_var in mylist:
                selected_var_query = 'desertion_perc'
            else:
                selected_var_query = selected_var

            sql_query = 'select code_municip, code_dept, name_municip, dane_alu_18_p, dane_tic_03_1_p, ' + \
                        'po_pob_rural_10mil, dane_alu_12_p, dane_tic_01, ' + selected_var_query + ' ' + \
                        'from cluster_master_table_by_municipio ' + \
                        'where code_dept = ' + single_qote + depto_val + single_qote + ';'

            df_var_by_dpto = def_data.runQuery(sql_query)
            df_var_by_dpto[selected_var] = df_var_by_dpto[selected_var].astype(
                np.float64)

            # 3.2.3.2 Filtering the Departament
            # ------------------------------
            MUN2_json['features'] = [
                city for city in MUN_json['features']
                if city['properties']['DPTO_CCDGO'] == depto_val
            ]
            center_x = MUN2_json['features'][0]['geometry']['coordinates'][0][
                0][0]
            center_y = MUN2_json['features'][0]['geometry']['coordinates'][0][
                0][1]
            new_center = dict(lat=center_y, lon=center_x)

            # 3.2.3.3 MAp Layout
            # ------------------------------
            label_fig = df_vars[df_vars['name'] ==
                                selected_var].reset_index()['label'][0]
            label_tittle = df_vars[
                df_vars['name'] ==
                selected_var].reset_index()['description'][0]

            df_var_by_dpto['% Transferred students'] = pd.Series(
                [
                    "{0:.2f}%".format(val * 100)
                    for val in df_var_by_dpto['dane_alu_12_p']
                ],
                index=df_var_by_dpto.index)
            df_var_by_dpto['% Students afternoon'] = pd.Series(
                [
                    "{0:.2f}%".format(val * 100)
                    for val in df_var_by_dpto['dane_alu_18_p']
                ],
                index=df_var_by_dpto.index)
            df_var_by_dpto['% Schools with electricity'] = pd.Series(
                [
                    "{0:.2f}%".format(val * 100)
                    for val in df_var_by_dpto['dane_tic_03_1_p']
                ],
                index=df_var_by_dpto.index)
            df_var_by_dpto['% Transferred students'] = pd.Series(
                [
                    "{0:.2f}%".format(val * 100)
                    for val in df_var_by_dpto['dane_alu_12_p']
                ],
                index=df_var_by_dpto.index)
            df_var_by_dpto['% Habitants (towns-rural)'] = pd.Series(
                [
                    round(val, 2)
                    for val in df_var_by_dpto['po_pob_rural_10mil']
                ],
                index=df_var_by_dpto.index)
            df_var_by_dpto['Avg computers x100 stu.'] = pd.Series(
                [round(val, 2) for val in df_var_by_dpto['dane_tic_01']],
                index=df_var_by_dpto.index)

            #df_var_by_dpto = df_var_by_dpto.rename({'dane_alu_18_p': '% Students afternoon',
            #                                        'dane_tic_01': 'Avg computers x100 stu.',
            #                                        'dane_alu_12_p': '% Transferred students',
            #                                        'dane_tic_03_1_p': '% Schools with electricity',
            #                                       'po_pob_rural_10mil': '% Habitants (towns-rural)'}, axis=1)

            # 3.2.3.4 Define new map
            # ------------------------------
            new_map = px.choropleth_mapbox(
                df_var_by_dpto,
                geojson=MUN2_json,
                locations='code_municip',
                color=selected_var,
                featureidkey="properties.MPIO_CCNCT",
                hover_name="name_municip",
                mapbox_style="carto-positron",
                center=new_center,
                hover_data=[
                    '% Students afternoon', '% Habitants (towns-rural)',
                    'Avg computers x100 stu.', '% Schools with electricity',
                    '% Transferred students'
                ],
                zoom=6,
                color_continuous_scale="blues",
            )
            new_map.update_layout(margin={
                "r": 0,
                "l": 0,
                "b": 0
            },
                                  title_text=label_tittle,
                                  coloraxis_colorbar=dict(title=label_fig))
            return new_map
Ejemplo n.º 7
0
MAP_MAPS_STYLE = {
    "position": "fixed",
    "width": "70%",
    "left": "17rem",
    "top": "140px",
    "border": "1px solid #e7eff6"
}

# ------------------------------
# 2. SQL Queries
# ------------------------------
# 2.1 Initial query
# ------------------------------
df_desertion = def_data.runQuery("""
    select code_dept, name_dept, avg(desertion_perc) as desertion_perc
    from master_table_by_municipio 
    where year_cohort = 2019
    group by code_dept, name_dept; """)
df_desertion['desertion_perc'] = df_desertion['desertion_perc'].astype(
    np.float64)
df_vars = def_data.runQuery(
    """select * from public.var_definition order by group_id;""")

# 2.2 Format adjusment
# ------------------------------

# ------------------------------
# 3. MAP
# ------------------------------
# 3.1 Global variables
# ------------------------------
Ejemplo n.º 8
0
def update_cluster_figures(feature_id, log_value, cluster_value, y_value):
    global df_scatter, cl_scatter_feature, cl_feature_label
    # 1. Determine if there is a change in the feature selection. If so, changes the data frame.
    if feature_id is not None:
        if feature_id == cl_scatter_feature:
            print('Not necessary to update data frame')
        else:  # Update the data frame with the selected feature.
            cl_scatter_feature = feature_id
            new_feature_name = df_vars[df_vars['var_id'] == str(feature_id)][[
                'var_name'
            ]].reset_index()['var_name'][0]
            cl_feature_label = df_vars[df_vars['var_id'] == str(feature_id)][[
                'Feature'
            ]].reset_index()['Feature'][0]
            sql_query = 'select name_municip, desertion_no, desertion_perc, me_cobertura_neta, ' + \
                        'cobertura_rank, desercion_rank, deser_perc_rank, '+ \
                        new_feature_name + ' from cluster_master_table_by_municipio; '
            df_scatter = def_data.runQuery(sql_query)
            for col in [
                    'desertion_no', 'me_cobertura_neta', 'desertion_perc',
                    new_feature_name
            ]:
                df_scatter[col] = df_scatter[col].astype(np.float64)
            df_scatter['Cluster'] = df_scatter['deser_perc_rank'].astype(
                str).str[0]
            df_scatter.rename(columns={
                'name_municip': 'Municipio',
                new_feature_name: cl_feature_label,
                'desertion_no': '# Dropouts',
                'desertion_perc': '% Dropouts',
                'me_cobertura_neta': 'Coverage',
                'deser_perc_rank': 'Cluster Description',
                "cobertura_rank": "Coverage Type",
                "desercion_rank": "Desertion Type"
            },
                              inplace=True)
    # 2. Filter data according to selected cluster.
    if cluster_value is None:  # If no filter, then use a copy of data.
        df_scatter_final = df_scatter
    else:
        df_scatter_final = df_scatter[df_scatter['Cluster Description'] ==
                                      cluster_value]

    # 3. Determine the scale o x-axis: linear or logarithmic
    cl_scale = True if log_value == 'log' else False

    # 4. Make new scatter plot
    new_scatter = px.scatter(df_scatter_final,
                             x=cl_feature_label,
                             y=y_value,
                             log_x=cl_scale,
                             color="Cluster")

    # 5. Make new box plot
    new_box = px.box(df_scatter_final,
                     x="Coverage Type",
                     y=cl_feature_label,
                     color="Desertion Type",
                     log_y=cl_scale,
                     points="all",
                     title="Box plot of " + cl_feature_label,
                     hover_data=["Municipio"])

    # 6. Second text according to feature selection
    new_second_text = ''
    if feature_id in text_list:
        new_second_text = text_content[feature_id]

    return [new_scatter, new_box, new_second_text]
Ejemplo n.º 9
0
# 1.10 Second text
STYLE_CLUSTER_END_SPACE = {
    "position": "absolute",
    "width": "42%",
    "height": "20px",
    "right": "7%",
    "top": "2250px"
}
# ------------------------------
# 2. SQL Queries
# ------------------------------
# 2.1 Query for cluster by municipality
# ------------------------------
df_clusters = def_data.runQuery("""
    select code_municip, name_municip, desertion_no, me_cobertura_neta, desertion_perc, deser_perc_rank, 
    cobertura_rank, desercion_rank, dane_doc_31
    from cluster_master_table_by_municipio; """)
for col in [
        'desertion_no', 'me_cobertura_neta', 'desertion_perc', 'dane_doc_31'
]:
    df_clusters[col] = df_clusters[col].astype(np.float64)
df_clusters.rename(columns={
    "name_municip": "Municipio",
    "desertion_no": "# Dropouts",
    "me_cobertura_neta": "Coverage",
    "desertion_perc": "% Dropouts",
    "deser_perc_rank": "Cluster Description",
    "cobertura_rank": "Coverage Type",
    "desercion_rank": "Desertion Type"
},
                   inplace=True)