def plot_model():
    global model_data
    # Getting the predicted data frame from predict crime method.
    model_data = predict_crime()
    df_final_plot = model_data

    # Creaing labels for shanky diagram
    df_final_plot['TYPE_NO'] = df_final_plot['TYPE']
    df_final_plot['TYPE_NO'].replace('THEFT', '2', inplace=True)
    df_final_plot['TYPE_NO'].replace('MISCHIEF', '0', inplace=True)
    df_final_plot['TYPE_NO'].replace('BREAK And ENTER', '1', inplace=True)
    df_final_plot['TYPE_NO'].replace('VEHICLE COLLISION', '3', inplace=True)
    df_final_plot['TYPE_NO'] = df_final_plot['TYPE_NO'].astype('int32')

    NEIGHBOURHOOD = go.parcats.Dimension(
        values=df_final_plot.NEIGHBOURHOOD, label="NEIGHBOURHOOD")

    DAY_TYPE = go.parcats.Dimension(values=df_final_plot.DAY_TYPE, label="DAY_TYPE")
    TYPE = go.parcats.Dimension(values=df_final_plot.TYPE, label="CRIME TYPE")
    color = df_final_plot.TYPE_NO;

    # Creating the shanky diagram with plotly go.
    fig = go.Figure(data=[go.Parcats(dimensions=[NEIGHBOURHOOD, DAY_TYPE, TYPE],
                                     line={'color': color, 'colorscale': 'rdbu'},
                                     labelfont={'size': 12, 'family': 'Times'},
                                     tickfont={'size': 10, 'family': 'Times'},
                                     arrangement='freeform')])

    return fig
def PCVisualize(filenum, columns, color):
    filepath = session.get('filepath', None)
    set_vars(filepath)
    set_mappings()
    global data_cols,df_dataset
    cols_file = 'static/parallel.csv'
    if(filenum == '1'):
        datadf = df_dataset
        columndata = columns.split(',')
        columndata = [data_cols[c] for c in columndata]
        resultdf = datadf[columndata]
        # resultdf['color'] = datadf[data_cols[color]]

    try:
        col_len = len(columndata)
        dict_list = []
        for a in range(col_len):
            dict_list.append(dict(label=str(columndata[a]), values = resultdf[columndata[a]]))

        # data = [go.Parcats(line = dict(color = datadf[data_cols[color]], colorscale = 'rainbow', showscale = True, cmin=datadf[data_cols[color]].min(), cmax=datadf[data_cols[color]].max()), dimensions = dict_list)]
        data = [go.Parcats(line = dict(color = datadf[data_cols[color]], colorscale = 'viridis', showscale = True, cmid=datadf[data_cols[color]].median()), dimensions = dict_list)]

        graphJSON = json.dumps(data, cls=plotly.utils.PlotlyJSONEncoder)

        para = graphJSON
    except Exception as e:
        return render_template("500.html", error=str(e))
    return render_template("paraplot.html", plot=para)
def update_figure():
    fig = go.Figure(data=[
        go.Parcats(dimensions=[Time_dim, Terminal_dim, Passengers_dim],
                   line={
                       'color': color,
                       'colorscale': colorscale
                   },
                   hoveron='color',
                   hoverinfo='count+probability',
                   labelfont={
                       'size': 18,
                       'family': 'Times'
                   },
                   tickfont={
                       'size': 16,
                       'family': 'Times'
                   },
                   arrangement='freeform')
    ])
    return {fig}
Exemple #4
0
    def feature_interactions(radio, url, feat_importance, rows):
        data_id = int(re.search(r"data/(\d+)", url).group(1))
        if feat_importance == "done":
            df = pd.read_pickle("cache/df" + str(data_id) + ".pkl")
            fi = pd.read_pickle("cache/fi" + str(data_id) + ".pkl")
        else:
            return []

        # Get meta data
        meta_data = pd.DataFrame(rows)
        try:
            target_attribute = meta_data[meta_data["Target"] == "true"][
                "Attribute"
            ].values[0]
            target_type = meta_data[meta_data["Target"] == "true"]["DataType"].values[0]
        except IndexError:
            return "No target found", "No target found"

        if target_type == "nominal" or target_type == "string":
            y = pd.Categorical(df[target_attribute]).codes
        else:
            y = df[target_attribute]
        # Feature interaction plots
        df = clean_dataset(df)

        # Extract top nominal, top numeric features
        numerical_features = list(
            meta_data["Attribute"][meta_data["DataType"] == "numeric"]
        )
        nominal_features = list(
            meta_data["Attribute"][meta_data["DataType"] == "nominal"]
        )
        top_numericals = fi["index"][fi["index"].isin(numerical_features)][:4]
        top_nominals = fi["index"][fi["index"].isin(nominal_features)][:4]
        df["target"] = df[target_attribute]

        # Bin numeric target
        if target_type == "numeric":
            # cmap_type = 'seq'
            df["target_var"] = y
            df = bin_numeric(df, "target_var", "target")
            df.drop("bin", axis=1, inplace=True)
            df.drop("target_var", axis=1)
        else:
            # cmap_type = 'cat'
            try:
                df["target"] = df["target"].astype(int)
            except ValueError:
                logger.warning("target not converted to int")
            df.sort_values(by="target", inplace=True)
            df["target"] = df["target"].astype(str)

        # Radio - Display top features
        if radio == "top":
            top_features = df[fi["index"][0:4].values]
            top_features["target"] = df["target"]

            if len(top_numericals):
                px_mat = px.scatter_matrix(top_features, color="target", height=800)
                # C = ['rgb(166,206,227)', 'rgb(31,120,180)', 'rgb(178,223,138)',
                # 'rgb(51,160,44)', 'rgb(251,154,153)', 'rgb(227,26,28)']
                # N = len(df['target'].unique())
                # matrix = ff.create_scatterplotmatrix(top_features, diag='box',
                #                                      index='target',
                #                                      title="",
                #                                      #colormap=C,
                #                                      colormap_type=cmap_type,
                #
                #                                       height=800, width=900)
                px_mat.update_traces(diagonal_visible=False)

                graph = dcc.Graph(figure=px_mat)
            else:
                d = top_features
                parcats = [
                    go.Parcats(
                        dimensions=[
                            {"label": column, "values": list(d[column].values)}
                            for column in d.columns
                        ],
                        line={"color": y, "colorscale": "Portland"},
                        hoveron="color",
                        hoverinfo="count+probability",
                        arrangement="freeform",
                    )
                ]
                layout = go.Layout(autosize=False, height=800)

                fig = go.Figure(data=parcats, layout=layout)
                graph = dcc.Graph(figure=fig)
        elif radio == "numeric":  # Top numeric features
            if len(top_numericals):
                df_num = df[top_numericals]
                df_num["target"] = df["target"]
                px_mat = px.scatter_matrix(df_num, color="target", height=800)
                # matrix = ff.create_scatterplotmatrix(df_num,  diag='box', #'box'
                #                                      index='target',
                #                                      title="",
                #                                      #colormap=C,
                #                                      colormap_type=cmap_type,
                #                                      height=1000, width=900)
                graph = dcc.Graph(figure=px_mat)
                px_mat.update_traces(diagonal_visible=False)
            else:
                graph = html.P("No numericals found")
        elif radio == "nominal":
            if len(top_nominals):
                df_nom = df[top_nominals]
                df_nom["target"] = df["target"]

                parcats = [
                    go.Parcats(
                        dimensions=[
                            {"label": column, "values": list(df_nom[column].values)}
                            for column in df_nom.columns
                        ],
                        line={
                            "color": pd.Categorical(df_nom["target"]).codes,
                            "colorscale": "Portland",
                        },
                        hoveron="color",
                        hoverinfo="count+probability",
                        arrangement="freeform",
                    )
                ]
                layout = go.Layout(autosize=False, height=800)

                fig = go.Figure(data=parcats, layout=layout)
                graph = dcc.Graph(figure=fig)
            else:
                graph = html.P("No nominals found")

        return html.Div(graph, className="twelve columns")
    def feature_interactions(rows, radio, url, dummy):
        data_id = int(re.search('data/(\d+)', url).group(1))
        if dummy == "done":
            df = pd.read_pickle('cache/df' + str(data_id) + '.pkl')
            fi = pd.read_pickle('cache/fi' + str(data_id) + '.pkl')
        else:
            return []
        meta_data = pd.DataFrame(rows)
        try:
            target_attribute = meta_data[meta_data["Target"] ==
                                         "true"]["Attribute"].values[0]
            target_type = (
                meta_data[meta_data["Target"] == "true"]["DataType"].values[0])
        except IndexError:
            return "No target found", "No target found"
        if target_type == "nominal" or target_type == "string":
            y = pd.Categorical(df[target_attribute]).codes
        else:
            y = df[target_attribute]
        # Feature interaction plots
        df = clean_dataset(df)
        numerical_features = list(
            meta_data["Attribute"][meta_data["DataType"] == "numeric"])
        nominal_features = list(
            meta_data["Attribute"][meta_data["DataType"] == "nominal"])
        top_numericals = (
            fi['index'][fi['index'].isin(numerical_features)][:5])
        top_nominals = (fi['index'][fi['index'].isin(nominal_features)][:5])
        df['target'] = df[target_attribute]
        C = [
            'rgb(166,206,227)', 'rgb(31,120,180)', 'rgb(178,223,138)',
            'rgb(51,160,44)', 'rgb(251,154,153)', 'rgb(227,26,28)'
        ]
        if target_type == "numeric":
            cmap_type = 'seq'
            df['target'] = y
            df['target'] = pd.cut(df['target'], 1000).astype(str)
            cat = df['target'].str.extract('\((.*),',
                                           expand=False).astype(float)
            df['bin'] = pd.Series(cat)
            df.sort_values(by='bin', inplace=True)
            df.drop('bin', axis=1, inplace=True)
        else:
            cmap_type = 'cat'
            N = len(df['target'].unique())
            try:
                df['target'] = df['target'].astype(int)
            except ValueError:
                print("target not converted to int")
            df.sort_values(by='target', inplace=True)
            df['target'] = df['target'].astype(str)

        if radio == "top":
            top_features = df[fi['index'][0:5].values]
            top_features['target'] = df['target']

            if len(top_numericals):

                matrix = ff.create_scatterplotmatrix(
                    top_features,
                    title='Top feature interactions',
                    diag='box',
                    index='target',
                    #colormap=C,
                    colormap_type=cmap_type,
                    height=800,
                    width=900)
                graph = dcc.Graph(figure=matrix)
            else:
                d = top_features
                parcats = [
                    go.Parcats(dimensions=[{
                        'label': column,
                        'values': list(d[column].values)
                    } for column in d.columns],
                               line={
                                   'color': y,
                                   'colorscale': 'Portland'
                               },
                               hoveron='color',
                               hoverinfo='count+probability',
                               arrangement='freeform')
                ]
                layout = go.Layout(autosize=False, width=1200, height=800)

                fig = go.Figure(data=parcats, layout=layout)
                graph = dcc.Graph(figure=fig)
        elif radio == "numeric":
            if len(top_numericals):
                df_num = df[top_numericals]
                df_num['target'] = df['target']
                matrix = ff.create_scatterplotmatrix(
                    df_num,
                    title='Top numeric feature interactions',
                    diag='box',
                    index='target',
                    #colormap=C,
                    colormap_type=cmap_type,
                    height=1000,
                    width=1000)
                graph = dcc.Graph(figure=matrix)
            else:
                graph = html.P("No numericals found")
        elif radio == "nominal":
            if len(top_nominals):
                df_nom = df[top_nominals]
                df_nom['target'] = df['target']

                parcats = [
                    go.Parcats(dimensions=[{
                        'label':
                        column,
                        'values':
                        list(df_nom[column].values)
                    } for column in df_nom.columns],
                               line={
                                   'color':
                                   pd.Categorical(df_nom['target']).codes,
                                   'colorscale': 'Portland'
                               },
                               hoveron='color',
                               hoverinfo='count+probability',
                               arrangement='freeform')
                ]
                layout = go.Layout(autosize=False, width=1000, height=800)
                fig = go.Figure(data=parcats, layout=layout)
                graph = dcc.Graph(figure=fig)
            else:
                graph = html.P("No nominals found")

        return html.Div(graph)
Exemple #6
0
def g_aluvial_cat(param_data, param_theme, param_dims):
    """
    Parameters
    ----------
    param_data : pd.DataFrame : data frame con tabla a graficar (tabla 3)
    param_theme : dict : diccionario con tema de visualizaciones
    param_dims : dict : diccionario con tamanos para visualizaciones

    Returns
    -------
    fig_g_aluvial_cat : plotly : objeto/diccionario tipo plotly para graficar

    Debugging
    ---------
    param_data = tabla_3
    param_theme = tema_base
    param_dims = dimensiones_base

    """

    # generacion de dimension: categoria
    categoria_dim = go.parcats.Dimension(
        values=param_data['categoria'],
        label='categoria')

    # generacion de dimension: pais
    pais_dim = go.parcats.Dimension(
        values=param_data['pais'],
        label='pais')

    # generacion de dimension: frecuencia de ocurrencia
    frecuencia_dim = go.parcats.Dimension(
        values=param_data['frecuencia'],
        label='frecuencia')

    # generacion de dimension: presencia de patrones tipo 1
    tipo_1_dim = go.parcats.Dimension(
        values=param_data['tipo_1'],
        label="tipo_1",
        categoryarray=[0, 1],
        ticktext=['sin patron', 'con patron'])

    # generacion de dimension: presencia de patrones tipo 2
    tipo_2_dim = go.parcats.Dimension(
        values=param_data['tipo_2'],
        label="tipo_2",
        categoryarray=[0, 1],
        ticktext=['sin patron', 'con patron'])

    # generacion de dimension: presencia de patrones tipo 3
    tipo_3_dim = go.parcats.Dimension(
        values=param_data['tipo_3'],
        label="tipo_3",
        categoryarray=[0, 1],
        ticktext=['sin patron', 'con patron'])

    # vector de colores para todas las lineas
    colores = [param_theme['color_linea_9'], param_theme['color_linea_2'],
               param_theme['color_linea_3'], param_theme['color_linea_4'],
               param_theme['color_linea_5'], param_theme['color_linea_6'],
               param_theme['color_linea_7'], param_theme['color_linea_8'],
               param_theme['color_linea_1']]

    # crear columna de color en los datos de entrada
    param_data['color'] = ['#ABABAB']*len(param_data['id'])

    for i in range(0, len(param_data['categoria'])):
        if param_data['categoria'].iloc[i] == 'Tasas de interes':
            param_data['color'].iloc[i] = colores[0]
        elif param_data['categoria'].iloc[i] == 'actividad economica':
            param_data['color'].iloc[i] = colores[3]
        elif param_data['categoria'].iloc[i] == 'consumo':
            param_data['color'].iloc[i] = colores[8]
        elif param_data['categoria'].iloc[i] == 'energia':
            param_data['color'].iloc[i] = colores[6]
        elif param_data['categoria'].iloc[i] == 'flujos de capital':
            param_data['color'].iloc[i] = colores[4]
        elif param_data['categoria'].iloc[i] == 'inflacion':
            param_data['color'].iloc[i] = colores[5]
        elif param_data['categoria'].iloc[i] == 'mercado inmobiliario':
            param_data['color'].iloc[i] = colores[1]
        elif param_data['categoria'].iloc[i] == 'mercado laboral':
            param_data['color'].iloc[i] = colores[7]
        elif param_data['categoria'].iloc[i] == 'subasta de bonos':
            param_data['color'].iloc[i] = colores[2]

    color = param_data['color'].tolist()

    # generacion del objeto figura
    fig_g_aluvial_cat = go.Figure()

    # agregar trazo de grafica tipo aluvial (parallel categories)
    fig_g_aluvial_cat.add_trace(go.Parcats(
        dimensions=[categoria_dim, frecuencia_dim, pais_dim,
                    tipo_1_dim, tipo_2_dim, tipo_3_dim],
        line={'color': color},
        hoveron='color', hoverinfo='count+probability',
        labelfont={'size': 14, 'family': 'Times',
                   'color': param_theme['color_texto_ejes']},
        tickfont={'size': 14, 'family': 'Times',
                  'color': param_theme['color_texto_ejes']},
        arrangement='perpendicular'))

    # layout de margen, titulos y ejes
    fig_g_aluvial_cat.update_layout(
        margin=go.layout.Margin(l=100, r=25, b=5, t=25, pad=10),
        title=None)

    # Formato de tamanos
    fig_g_aluvial_cat.layout.autosize = True
    fig_g_aluvial_cat.layout.width = param_dims['figura_3']['width']
    fig_g_aluvial_cat.layout.height = param_dims['figura_3']['height']

    return fig_g_aluvial_cat
Exemple #7
0
            dims = []
            for _, dim_key in enumerate(dim_parallel):
                dims.append(
                    go.parcats.Dimension(values=filtered_table[dim_key],
                                         label=dim_key))

            if c_key != 'None':
                unique_list = np.sort(filtered_table[c_key].unique())

                if np.issubdtype(unique_list.dtype, np.integer) or \
                        np.issubdtype(unique_list.dtype, np.floating):
                    parallel_fig = go.Figure(data=[
                        go.Parcats(dimensions=dims,
                                   line={
                                       'color': filtered_table[c_key],
                                       'colorbar': dict(title=c_key)
                                   },
                                   hoveron='color',
                                   hoverinfo='count+probability',
                                   arrangement='freeform')
                    ])
                else:
                    filtered_table['_C_'] = np.zeros_like(
                        filtered_table[c_key])
                    for idx, var in enumerate(unique_list):
                        filtered_table.loc[filtered_table[c_key] == var,
                                           '_C_'] = idx

                    parallel_fig = go.Figure(data=[
                        go.Parcats(dimensions=dims,
                                   line={'color': filtered_table['_C_']},
                                   hoverinfo='count+probability',