def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping): 
        if column_mapping:
            date_column = column_mapping.get('datetime')
            id_column = column_mapping.get('id')
            target_column = column_mapping.get('target')
            prediction_column = column_mapping.get('prediction')
            num_feature_names = column_mapping.get('numerical_features')
            if num_feature_names is None:
                num_feature_names = []
            else:
                num_feature_names = [name for name in num_feature_names if is_numeric_dtype(reference_data[name])] 

            cat_feature_names = column_mapping.get('categorical_features')
            if cat_feature_names is None:
                cat_feature_names = []
            else:
                cat_feature_names = [name for name in cat_feature_names if is_numeric_dtype(reference_data[name])] 
        
        else:
            date_column = 'datetime' if 'datetime' in reference_data.columns else None
            id_column = None
            target_column = 'target' if 'target' in reference_data.columns else None
            prediction_column = 'prediction' if 'prediction' in reference_data.columns else None

            utility_columns = [date_column, id_column, target_column, prediction_column]

            num_feature_names = list(set(reference_data.select_dtypes([np.number]).columns) - set(utility_columns))
            cat_feature_names = list(set(reference_data.select_dtypes([np.object]).columns) - set(utility_columns))

        if prediction_column is not None and target_column is not None:           
            additional_graphs_data = []
            params_data = []
            for feature_name in num_feature_names + cat_feature_names: 
                #add data for table in params
                params_data.append(
                    {
                        "details": {
                                "parts": [
                                    {
                                        "title": "Target",
                                        "id": feature_name + "_target_values"
                                    },
                                    {
                                        "title": "Prediction",
                                        "id": feature_name + "_prediction_values"
                                    }
                                ],
                                "insights": []
                            },
                            "f1": feature_name
                    }
                    )

                #create target plot
                reference_data['dataset'] = 'Reference'
                production_data['dataset'] = 'Production'
                merged_data = pd.concat([reference_data, production_data])

                target_fig = px.histogram(merged_data, x=feature_name, color=target_column, facet_col="dataset",
                    category_orders={"dataset": ["Reference", "Production"]})

                target_fig_json  = json.loads(target_fig.to_json())

                #create prediction plot
                pred_fig = px.histogram(merged_data, x=feature_name, color=prediction_column, facet_col="dataset",
                    category_orders={"dataset": ["Reference", "Production"]})

                pred_fig_json  = json.loads(pred_fig.to_json())

                #write plot data in table as additional data
                additional_graphs_data.append(
                    AdditionalGraphInfo(
                        feature_name + '_target_values',
                        {
                            "data" : target_fig_json['data'],
                            "layout" : target_fig_json['layout']
                        }, 
                    )
                )

                additional_graphs_data.append(
                    AdditionalGraphInfo(
                        feature_name + '_prediction_values',
                        {
                            "data" : pred_fig_json['data'],
                            "layout" : pred_fig_json['layout']
                        }, 
                    )
                )

            self.wi = BaseWidgetInfo(
                title=self.title,
                type="big_table",
                details="",
                alertStats=AlertStats(),
                alerts=[],
                alertsPosition="row",
                insights=[],
                size=2,
                params={
                    "rowsPerPage" : min(len(num_feature_names) + len(cat_feature_names), 10),
                    "columns": [
                        {
                            "title": "Feature",
                            "field": "f1"
                        }
                    ],
                    "data": params_data
                },
                additionalGraphs=additional_graphs_data
            )

        elif target_column is not None:
            additional_graphs_data = []
            params_data = []
            for feature_name in num_feature_names + cat_feature_names: 
                #add data for table in params
                params_data.append(
                    {
                        "details": {
                                "parts": [
                                    {
                                        "title": "Target",
                                        "id": feature_name + "_target_values"
                                    }
                                ],
                                "insights": []
                            },
                            "f1": feature_name
                    }
                    )

                #create target plot
                reference_data['dataset'] = 'Reference'
                production_data['dataset'] = 'Production'
                merged_data = pd.concat([reference_data, production_data])

                target_fig = px.histogram(merged_data, x=feature_name, color=target_column, facet_col="dataset",
                    category_orders={"dataset": ["Reference", "Production"]})

                target_fig_json  = json.loads(target_fig.to_json())

                #write plot data in table as additional data
                additional_graphs_data.append(
                    AdditionalGraphInfo(
                        feature_name + '_target_values',
                        {
                            "data" : target_fig_json['data'],
                            "layout" : target_fig_json['layout']
                        }, 
                    )
                )

            self.wi = BaseWidgetInfo(
                title=self.title,
                type="big_table",
                details="",
                alertStats=AlertStats(),
                alerts=[],
                alertsPosition="row",
                insights=[],
                size=2,
                params={
                    "rowsPerPage" : min(len(num_feature_names) + len(cat_feature_names), 10),
                    "columns": [
                        {
                            "title": "Feature",
                            "field": "f1"
                        }
                    ],
                    "data": params_data
                },
                additionalGraphs=additional_graphs_data
            )
        elif prediction_column is not None:
            additional_graphs_data = []
            params_data = []
            for feature_name in num_feature_names + cat_feature_names: 
                #add data for table in params
                params_data.append(
                    {
                        "details": {
                                "parts": [
                                    {
                                        "title": "Prediction",
                                        "id": feature_name + "_prediction_values"
                                    }
                                ],
                                "insights": []
                            },
                            "f1": feature_name
                    }
                    )

                #create target plot
                reference_data['dataset'] = 'Reference'
                production_data['dataset'] = 'Production'
                merged_data = pd.concat([reference_data, production_data])

                prediction_fig = px.histogram(merged_data, x=feature_name, color=prediction_column, facet_col="dataset",
                    category_orders={"dataset": ["Reference", "Production"]})

                prediction_fig_json  = json.loads(prediction_fig.to_json())

                #write plot data in table as additional data
                additional_graphs_data.append(
                    AdditionalGraphInfo(
                        feature_name + '_prediction_values',
                        {
                            "data" : prediction_fig_json['data'],
                            "layout" : prediction_fig_json['layout']
                        }, 
                    )
                )

            self.wi = BaseWidgetInfo(
                title=self.title,
                type="big_table",
                details="",
                alertStats=AlertStats(),
                alerts=[],
                alertsPosition="row",
                insights=[],
                size=2,
                params={
                    "rowsPerPage" : min(len(num_feature_names) + len(cat_feature_names), 10),
                    "columns": [
                        {
                            "title": "Feature",
                            "field": "f1"
                        }
                    ],
                    "data": params_data
                },
                additionalGraphs=additional_graphs_data
            )            

        else:
            self.wi = None
Ejemplo n.º 2
0
 def histogram(self, title='', bins=50, color='blue', metric=''):
     px.histogram(self.sort_values(metric),
                  x=metric,
                  title=title,
                  nbins=bins,
                  color_discrete_sequence=[color]).show()
Ejemplo n.º 3
0
cols = st.multiselect("Atributos", df.columns.tolist(), default=default_cols)

# exibindo os 10 primeiros registros do df
st.dataframe(df[cols].head(10))

st.subheader("Distribuição de imóveis por preço")

# definindo a faixa de valores
faixa_valores = st.slider("Faixa de preço", float(df.MEDV.min()), 150.,
                          (10.0, 100.0))

# filtrando os dados
dados = df[df.MEDV.between(left=faixa_valores[0], right=faixa_valores[1])]

# plot a distribuição dos dados
f = px.histogram(dados, x='MEDV', nbins=100, title='Distribuição de Preços')
f.update_xaxes(title='MEDV')
f.update_yaxes(title='Total Imóveis')
st.plotly_chart(f)

st.sidebar.subheader("Defina os atributos do imóvel para predição")

# mapeando dados do usuário para cada atributo
crim = st.sidebar.number_input("Taxa de criminalidade", value=df.CRIM.mean())
indus = st.sidebar.number_input("Proporção de hectares de negócio",
                                value=df.INDUS.mean())
chas = st.sidebar.selectbox("Faz limite com o rio?", ('Sim', "Não"))

# transformando o dado de entrada em valor binario
chas = 1 if chas == 'Sim' else 0
Ejemplo n.º 4
0
def update_graph2(chosen_column):
    dff2 = df[chosen_column][:1000]
    fig2 = px.histogram(dff2, x=chosen_column)
    return fig2
Ejemplo n.º 5
0
def update_histogram(x_data, y_data, func):
    fig = px.histogram(df_pu, x=x_data, y=y_data, histfunc=func)
    fig.update_layout(xaxis={'categoryorder': 'sum descending'})
    return fig
Ejemplo n.º 6
0
# exibindo os top 10 registro do dataframe
st.dataframe(data[cols].head(10))

st.subheader("Distribuição de imóveis por preço")

# definindo a faixa de valores
faixa_valores = st.slider("Faixa de preço", float(data.MEDV.min()), 150.,
                          (10.0, 100.0))

# filtrando os dados
dados = data[data['MEDV'].between(left=faixa_valores[0],
                                  right=faixa_valores[1])]

# plot a distribuição dos dados
f = px.histogram(dados, x="MEDV", nbins=100, title="Distribuição de Preços")
f.update_xaxes(title="MEDV")
f.update_yaxes(title="Total Imóveis")
st.plotly_chart(f)

st.sidebar.subheader("Defina os atributos do imóvel para predição")

# mapeando dados do usuário para cada atributo
crim = st.sidebar.number_input("Taxa de Criminalidade", value=data.CRIM.mean())
indus = st.sidebar.number_input("Proporção de Hectares de Negócio",
                                value=data.CRIM.mean())
chas = st.sidebar.selectbox("Faz limite com o rio?", ("Sim", "Não"))

# transformando o dado de entrada em valor binário
chas = 1 if chas == "Sim" else 0
Ejemplo n.º 7
0
def main():
    st.title(
        "Prediction of Trip History Data using various Machine Learning Classification Algorithms- A Streamlit Demo!"
    )
    data = loadData()
    X_train, X_test, y_train, y_test, le = preprocessing(data)

    # Insert Check-Box to show the snippet of the data.
    if st.checkbox('Show Raw Data'):
        st.subheader("Showing raw data---->>>")
        st.write(data.head())

    # ML Section
    choose_model = st.sidebar.selectbox(
        "Choose the ML Model",
        ["NONE", "Decision Tree", "Neural Network", "K-Nearest Neighbours"])

    if (choose_model == "Decision Tree"):
        score, report, tree = decisionTree(X_train, X_test, y_train, y_test)
        st.text("Accuracy of Decision Tree model is: ")
        st.write(score, "%")
        st.text("Report of Decision Tree model is: ")
        st.write(report)

        try:
            if (st.checkbox(
                    "Want to predict on your own Input? It is recommended to have a look at dataset to enter values in below tabs than just typing in random values"
            )):
                user_prediction_data = accept_user_data()
                pred = tree.predict(user_prediction_data)
                st.write(
                    "The Predicted Class is: ", le.inverse_transform(pred)
                )  # Inverse transform to get the original dependent value.
        except:
            pass

    elif (choose_model == "Neural Network"):
        score, report, clf = neuralNet(X_train, X_test, y_train, y_test)
        st.text("Accuracy of Neural Network model is: ")
        st.write(score, "%")
        st.text("Report of Neural Network model is: ")
        st.write(report)

        try:
            if (st.checkbox(
                    "Want to predict on your own Input? It is recommended to have a look at dataset to enter values in below tabs than just typing in random values"
            )):
                user_prediction_data = accept_user_data()
                scaler = StandardScaler()
                scaler.fit(X_train)
                user_prediction_data = scaler.transform(user_prediction_data)
                pred = clf.predict(user_prediction_data)
                st.write(
                    "The Predicted Class is: ", le.inverse_transform(pred)
                )  # Inverse transform to get the original dependent value.
        except:
            pass

    elif (choose_model == "K-Nearest Neighbours"):
        score, report, clf = Knn_Classifier(X_train, X_test, y_train, y_test)
        st.text("Accuracy of K-Nearest Neighbour model is: ")
        st.write(score, "%")
        st.text("Report of K-Nearest Neighbour model is: ")
        st.write(report)

        try:
            if (st.checkbox(
                    "Want to predict on your own Input? It is recommended to have a look at dataset to enter values in below tabs than just typing in random values"
            )):
                user_prediction_data = accept_user_data()
                pred = clf.predict(user_prediction_data)
                st.write(
                    "The Predicted Class is: ", le.inverse_transform(pred)
                )  # Inverse transform to get the original dependent value.
        except:
            pass

    # Visualization Section
    plotData = showMap()
    st.subheader(
        "Bike Travel History data plotted-first few locations located near Washington DC"
    )
    st.map(plotData, zoom=14)

    choose_viz = st.sidebar.selectbox("Choose the Visualization", [
        "NONE", "Total number of vehicles from various Starting Points",
        "Total number of vehicles from various End Points",
        "Count of each Member Type"
    ])

    if (choose_viz == "Total number of vehicles from various Starting Points"):
        fig = px.histogram(data['Start station'], x='Start station')
        st.plotly_chart(fig)
    elif (choose_viz == "Total number of vehicles from various End Points"):
        fig = px.histogram(data['End station'], x='End station')
        st.plotly_chart(fig)
    elif (choose_viz == "Count of each Member Type"):
        fig = px.histogram(data['Member type'], x='Member type')
        st.plotly_chart(fig)
Ejemplo n.º 8
0
st.markdown("# Rejected CA License Plates")
df = fetch_data()

st.markdown("## Random")
random_set = df.sample(3).to_dict(orient='records')
for row in random_set:
    st.markdown(f'''
        ## {row["plate"]}
        * Customer Explanation: {row["customer_meaning"]}
        * Flag Reason: {row["reason"]}
        * Reviewer Comments: {row["reviewer_comments"]}
        * __{"REJECTED" if row["status"] == "N" else "APPROVED"}__
''')
st.button("Try Another!")

st.markdown("## Summary Stats")
st.write(df.describe())

st.markdown("## Approvals")
st.plotly_chart(
    px.histogram(df.query('status in ["Y", "N"]'),
                 x='status',
                 title='Approved?'))

st.markdown("## Common Flags")
st.plotly_chart(px.histogram(df, x='reason', title='Flagging'))

st.markdown("## Data")
st.dataframe(df, width=1000, height=1200)
Ejemplo n.º 9
0
# visit http://127.0.0.1:8050/ in your web browser.

import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.express as px
import pandas as pd

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
colors = {'background': '#111111', 'text': '#7FDBFF'}

df = pd.read_csv("data/googleplaystore.csv")

fig = px.histogram(df, x="Rating", range_x=[0.8, 5.2])
fig.update_layout(plot_bgcolor=colors['background'],
                  paper_bgcolor=colors['background'],
                  font_color=colors['text'])

app.layout = html.Div(style={'backgroundColor': colors['background']},
                      children=[
                          html.H1(children='Dashboard aplicaciones Android',
                                  style={
                                      'textAlign': 'center',
                                      'color': colors['text']
                                  }),
                          html.Div(children='''
                Primera app de aprendizaje con Dash
            ''',
                                   style={
Ejemplo n.º 10
0
def cluster(key, maxClusters):

    summaryStats = pd.DataFrame(columns=['BIC', 'Likelihood'])

    predictions = pd.DataFrame(normData['subjid'])

    variables = featureDict[key]

    for components in range(1, maxClusters + 1):

        # Fit GMM model
        gmm = GaussianMixture(n_components=components,
                              covariance_type='diag',
                              random_state=0).fit(normData[variables])

        # bic and score for the model
        summaryStats.loc[components, :] = [
            gmm.bic(normData[variables]),
            gmm.score(normData[variables])
        ]

        clusteringName = key + str(components)

        # Get means of variables in allVarsNeth by cluster
        predictions[clusteringName] = gmm.predict(normData[variables])

        normData_pred = pd.merge(normData, predictions, on='subjid')

        means = normData_pred[allVarsNeth.union([clusteringName
                                                 ])].groupby([clusteringName
                                                              ]).mean().T

        # means and covariances of the GMM components
        #means = pd.DataFrame( gmm.means_, columns = normData[variables].columns).T

        #covs = gmm.covariances_,

        # Create a plot of the means
        figure = px.line(means,
                         x=means.index,
                         y=means.columns,
                         template="simple_white")

        savepath = root + 'stevenkerr/Git/wp5-clustering/Code/clinical/GMM_clustering_' + key

        means.to_csv(savepath + '/means' + str(components) + '.csv')

        pio.write_html(figure,
                       file=savepath + '/means' + str(components) + '.html',
                       auto_open=False)

        # Create histrogram of sf94 by cluster
        data_pred = pd.merge(data, predictions, on='subjid')

        histFig = px.histogram(data_pred, x='sf94', color=clusteringName, nbins = 10, \
                               barmode = 'group', histnorm='probability density')

        pio.write_html(histFig,
                       file=savepath + '/sf94' + str(components) + '.html',
                       auto_open=False)

    return (summaryStats, predictions)
Ejemplo n.º 11
0
if part == 'Introduction':
    st.title('Flight Delay Analysis')

    st.markdown('''
                ## Structure of Notebook
                1. Basic Data Introduction
                2. Analyze columns with a lot of null values
                3. Flight Distance Analysis
                4. Departure Delay Temporal Analysis
                5. Analysis of Categorical Variables of Interest
                6. Final conclusions
                ''')

    st.title('Basic Introduction')

    fig = px.histogram(data_frame=df, x='arr_delay')
    st.plotly_chart(fig, use_container_width=True)

    #%%
    fig1 = px.box(data_frame=df, x='arr_delay')
    st.plotly_chart(fig1, use_container_width=True)
    # %%
    st.markdown('''
        ## Initial Data Introduction
        - Inital look at data tells us that flight delays are very skewed.
        - Majority of flights are expected to be early, or on time as the median fligth delay is `-6`
        - Although most flights make good time. Their seems to be a tendancy for extreme outliers.        
                ''')
#%%
elif part == 'Analyze columns with a lot of null values':
Ejemplo n.º 12
0
def variables():
    """
    Fonction qui traite toutes les colonnes de notre CSV afin de les décrirent sous forme quantitative ou qualitative
    :return: Renvoi vers variables.html
    """

    # Adaptez le chemin à votre poste
    df = pd.read_csv(
        "ina-barometre-jt-tv-donnees-mensuelles-2005-2018-nombre-de-sujets.csv",
        encoding='1252', delimiter=';')

    # Reformate la date
    df["MOIS"] = pd.to_datetime(df["MOIS"], format="%d/%m/%Y")

    ################################################################
    # Description Variable Année

    df2 = df

    df2["ANNEE"] = pd.DatetimeIndex(df2["MOIS"]).year
    date_toto = df2.iloc[:, [8, 9]].groupby(["ANNEE"]).sum()
    date_mean = round(df2.iloc[:, [8, 9]].groupby(["ANNEE"]).mean(), 2)
    date_std = round(df2.iloc[:, [8, 9]].groupby(["ANNEE"]).std(), 2)

    df2["MONTH"] = pd.DatetimeIndex(df2["MOIS"]).month
    month_toto = df2.iloc[:, [8, 10]].groupby(["MONTH"]).sum()

    ################################################################
    # Description Variable Thématique

    thematique_grouped = df.groupby("THEMATIQUES").sum()
    grouped_total = thematique_grouped.iloc[:, 6]
    effectif_thematique = thematique_grouped.iloc[:, 6].sum()
    percent_tot = round((grouped_total / effectif_thematique) * 100, 2)

    ################################################################
    # Description Variables Chaînes

    mean_tot_chaine = round(df.iloc[:, 2:8].mean(), 2)
    std_tot_chaine = round(df.iloc[:, 2:8].std(), 2)
    sum_tot_chaine = df.iloc[:, 2:8].sum()

    ################################################################
    # Graph Variable Année

    fig_date_toto = px.line(date_toto, x=date_toto.index, y=date_toto["Totaux"], labels={
                                                                                            "ANNEE": "",
                                                                                            "Totaux": ""
                                                                                        })
    fig_date_toto.update_traces(mode='markers+lines')

    fig_date_toto.update_layout({
                                    'plot_bgcolor': 'rgba(0, 0, 0, 0)',
                                    'paper_bgcolor': 'rgba(0, 0, 0, 0)',
                                })

    figure_date_toto = to_json(fig_date_toto)

    ################################################################
    # Graph Variable Thématique
    fig = px.pie(grouped_total, names=grouped_total.index, values="Totaux")
    fig.update_layout({
                            'plot_bgcolor': 'rgba(0, 0, 0, 0)',
                            'paper_bgcolor': 'rgba(0, 0, 0, 0)',
                      })
    fig.update_layout(legend=dict(
                                        yanchor="top",
                                        y=0.99,
                                        xanchor="left",
                                        x=0
                                  ))
    figure_thematique = to_json(fig)

    ################################################################
    # Graph Variables Chaînes
    fig_tot_chaine = px.histogram(std_tot_chaine, x=std_tot_chaine.index, y=sum_tot_chaine, color=std_tot_chaine.index
                                  , labels={
                                                "index": "",
                                                "count": "",
                                                "sum of y": "",
                                                "y": ""
                                            })
    fig_tot_chaine.update_layout({
                                    'plot_bgcolor': 'rgba(0, 0, 0, 0)',
                                    'paper_bgcolor': 'rgba(0, 0, 0, 0)',
                                  })
    figure_chaines = to_json(fig_tot_chaine)

    return render_template("variables.html", date_mean=date_mean, date_std=date_std,
                           grouped_total=grouped_total, percent_tot=percent_tot, mean_tot_chaine=mean_tot_chaine,
                           std_tot_chaine=std_tot_chaine, figure_date_toto=figure_date_toto,
                           figure_thematique=figure_thematique, figure_chaines=figure_chaines)
Ejemplo n.º 13
0
df = pd.read_csv(
    'https://raw.githubusercontent.com/Coding-with-Adam/Dash-by-Plotly/master/Callbacks/Client-side-callback/opsales.csv'
)

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

app.layout = html.Div([
    html.Button('Print Graphs', id='printing'),
    html.Div(id='hidden-content'),
    html.H1('Analysis of Store Sales', style={'textAlign': 'center'}),
    dcc.Graph(id='one',
              figure=px.pie(df, names="Shipping Mode",
                            values="Sales").update_traces(
                                textinfo='label+percent', showlegend=False)),
    dcc.Graph(id='two', figure=px.histogram(df, x="Order Status", y="Sales")),
    dcc.Graph(id='thr',
              figure=px.pie(df, names="Customer Segment",
                            values="Sales").update_traces(
                                textinfo='label+percent', showlegend=False)),
])

app.clientside_callback(
    """
    function(clicks) {
        if (clicks > 0) {
          window.print()
        }
        return ""
    }
    """, Output('hidden-content', 'children'), Input('printing', 'n_clicks'))
Ejemplo n.º 14
0
    def make_grouped_plot(
        self,
        ensembles: list,
        prop: str,
        selector_values: List[Any],
        statistic: str = "Avg",
        plot_type: str = "histogram",
    ) -> go.Figure:
        sel_length = 1
        for selector in selector_values:
            sel_length *= len(selector)
        if sel_length > 50 and plot_type != "scatter_ensemble":
            return {
                "layout": {
                    "title":
                    "Reduce number of filter selections to display graph",
                }
            }
        df = self.dataframe.copy()
        df = df[df["PROPERTY"] == prop]
        if selector_values is not None:
            df = self.filter_dataframe(df, self.selectors, selector_values)

        df = df[df["ENSEMBLE"].isin(ensembles)]
        if plot_type == "histogram":
            fig = px.histogram(
                df,
                x=statistic,
                nbins=20,
                facet_col="label",
                facet_col_wrap=5,
                color="ENSEMBLE",
                barmode="stack",
                color_discrete_sequence=self.colorway,
            )

        if plot_type == "bar":
            fig = px.bar(
                df,
                y=statistic,
                x="REAL",
                facet_col="label",
                facet_col_wrap=5,
                color="ENSEMBLE",
                barmode="group",
                color_discrete_sequence=self.colorway,
            )
        if plot_type == "scatter":
            fig = px.scatter(
                df,
                y=statistic,
                x="REAL",
                facet_col="label",
                facet_col_wrap=5,
                color="ENSEMBLE",
                color_discrete_sequence=self.colorway,
            )
        if plot_type == "scatter_ensemble":
            fig = px.scatter(
                df,
                y=statistic,
                x="REAL",
                facet_col="ENSEMBLE",
                facet_col_wrap=5,
                color="label",
                color_discrete_sequence=self.colorway,
            )
        fig = fig.to_dict()
        fig["layout"] = self.theme.create_themed_layout(fig["layout"])
        return fig
Ejemplo n.º 15
0
def plot_graphs(request):
    print("plot_graphs function")
    global ppd
    fig = None
    fig_error = False

    blank_choice = (None, '---------')
    features_name = [(i, i) for i in ppd.getFeatureName()]
    category_features_name = [(i, i) for i in ppd.get_category_list()]
    numeric_features_name = [(i, i) for i in ppd.get_numeric_features_name()]
    features_name.append(blank_choice)
    category_features_name.append(blank_choice)
    numeric_features_name.append(blank_choice)

    if request.method == 'POST':
        if 'scatter_btn' in request.POST:
            print("scatter form")
            print(request.POST)
            scatter = Scatter_form(request.POST)
            scatter.fields['x'].choices = features_name
            scatter.fields['y'].choices = features_name
            scatter.fields['facet_row'].choices = category_features_name
            scatter.fields['facet_col'].choices = category_features_name
            scatter.fields['color'].choices = category_features_name
            scatter.fields['size'].choices = numeric_features_name
            print(scatter.errors)

            if scatter.is_valid():
                print("form valid")
                x = scatter.cleaned_data['x']
                y = scatter.cleaned_data['y']
                facet_row = scatter.cleaned_data['facet_row']
                facet_col = scatter.cleaned_data['facet_col']
                facet_col_wrap = scatter.cleaned_data['facet_col_wrap']
                color = scatter.cleaned_data['color']
                size = scatter.cleaned_data['size']

                data_feature_list = list()
                data_feature_list.append(x)
                data_feature_list.append(y)
                if len(size) > 0:
                    data_feature_list.append(size)
                if len(color) > 0:
                    data_feature_list.append(color)
                if len(facet_row) > 0:
                    data_feature_list.append(facet_row)
                if len(facet_col) > 0:
                    data_feature_list.append(facet_col)

                data = pd.DataFrame(ppd.get_features_data(data_feature_list))
                try:
                    fig = px.scatter(
                        data_frame=data,
                        x=x,
                        y=y,
                        facet_row=None if len(facet_row) == 0 else facet_row,
                        facet_col=None if len(facet_col) == 0 else facet_col,
                        facet_col_wrap=facet_col_wrap,
                        title=scatter.cleaned_data['title'],
                        color=None if len(color) == 0 else color,
                        size=None if len(size) == 0 else size,
                        log_x=scatter.cleaned_data['log_x'],
                        log_y=scatter.cleaned_data['log_y'],
                        render_mode=scatter.cleaned_data['render_mode'],
                        height=800)
                    print("fig create success")
                except:
                    print("fig create error")
                    fig_error = True
                    fig = None

        if 'scatter_3d_btn' in request.POST:
            scatter_3d = Scatter_3d_form(request.POST)
            scatter_3d.fields['x'].choices = features_name
            scatter_3d.fields['y'].choices = features_name
            scatter_3d.fields['z'].choices = features_name
            scatter_3d.fields['color'].choices = category_features_name
            scatter_3d.fields['size'].choices = numeric_features_name

            if scatter_3d.is_valid():
                x = scatter_3d.cleaned_data['x']
                y = scatter_3d.cleaned_data['y']
                z = scatter_3d.cleaned_data['z']
                color = scatter_3d.cleaned_data['color']
                size = scatter_3d.cleaned_data['size']

                data_feature_list = list()
                data_feature_list.append(x)
                data_feature_list.append(y)
                data_feature_list.append(z)
                if len(size) > 0:
                    data_feature_list.append(size)
                if len(color) > 0:
                    data_feature_list.append(color)

                data = pd.DataFrame(ppd.get_features_data(data_feature_list))

                try:
                    fig = px.scatter_3d(
                        data_frame=data,
                        x=x,
                        y=y,
                        z=z,
                        title=scatter_3d.cleaned_data['title'],
                        color=None if len(color) == 0 else color,
                        size=None if len(size) == 0 else size,
                        log_x=scatter_3d.cleaned_data['log_x'],
                        log_y=scatter_3d.cleaned_data['log_y'],
                        log_z=scatter_3d.cleaned_data['log_z'],
                        height=800)
                    print("fig create success")
                except:
                    print("fig create error")
                    fig_error = True
                    fig = None

        if 'line_btn' in request.POST:
            print("Line Plot")
            line = Line_form(request.POST)
            line.fields['x'].choices = features_name
            line.fields['y'].choices = features_name
            line.fields['facet_row'].choices = category_features_name
            line.fields['facet_col'].choices = category_features_name
            line.fields['color'].choices = category_features_name

            if line.is_valid():
                print("line is valid")
                x = line.cleaned_data['x']
                y = line.cleaned_data['y']
                facet_row = line.cleaned_data['facet_row']
                facet_col = line.cleaned_data['facet_col']
                facet_col_wrap = line.cleaned_data['facet_col_wrap']
                color = line.cleaned_data['color']

                data_feature_list = list()
                data_feature_list.append(x)
                data_feature_list.append(y)

                if len(color) > 0:
                    data_feature_list.append(color)
                if len(facet_row) > 0:
                    data_feature_list.append(facet_row)
                if len(facet_col) > 0:
                    data_feature_list.append(facet_col)

                data = pd.DataFrame(ppd.get_features_data(data_feature_list))

                try:
                    fig = px.line(
                        data_frame=data,
                        x=x,
                        y=y,
                        facet_row=None if len(facet_row) == 0 else facet_row,
                        facet_col=None if len(facet_col) == 0 else facet_col,
                        facet_col_wrap=facet_col_wrap,
                        title=line.cleaned_data['title'],
                        color=None if len(color) == 0 else color,
                        height=800)
                    print("fig create success")
                except:
                    print("fig create error")
                    fig_error = True
                    fig = None

        if 'bar_btn' in request.POST:
            print("Bar Plot")
            bar = Bar_form(request.POST)
            bar.fields['x'].choices = features_name
            bar.fields['y'].choices = features_name
            bar.fields['facet_row'].choices = category_features_name
            bar.fields['facet_col'].choices = category_features_name
            bar.fields['color'].choices = category_features_name

            if bar.is_valid():
                print("Bar is valid")
                x = bar.cleaned_data['x']
                y = bar.cleaned_data['y']
                facet_row = bar.cleaned_data['facet_row']
                facet_col = bar.cleaned_data['facet_col']
                facet_col_wrap = bar.cleaned_data['facet_col_wrap']
                color = bar.cleaned_data['color']
                title = bar.cleaned_data['title']
                orientation = bar.cleaned_data['orientation']
                bar_mode = bar.cleaned_data['bar_mode']

                data_feature_list = list()
                data_feature_list.append(x)
                data_feature_list.append(y)

                if len(color) > 0:
                    data_feature_list.append(color)
                if len(facet_row) > 0:
                    data_feature_list.append(facet_row)
                if len(facet_col) > 0:
                    data_feature_list.append(facet_col)

                data = pd.DataFrame(ppd.get_features_data(data_feature_list))

                try:
                    fig = px.bar(
                        data_frame=data,
                        x=x,
                        y=y,
                        facet_row=None if len(facet_row) == 0 else facet_row,
                        facet_col=None if len(facet_col) == 0 else facet_col,
                        facet_col_wrap=facet_col_wrap,
                        title=title,
                        color=None if len(color) == 0 else color,
                        orientation=orientation,
                        barmode=bar_mode,
                        height=800)
                    print("fig create success")
                except:
                    print("fig create error")
                    fig_error = True
                    fig = None

        if 'pie_btn' in request.POST:
            print("Pie Plot")
            pie = Pie_form(request.POST)
            pie.fields['values'].choices = features_name
            pie.fields['names'].choices = category_features_name
            pie.fields['color'].choices = category_features_name

            if pie.is_valid():
                print("Pie is valid")
                values = pie.cleaned_data['values']
                names = pie.cleaned_data['names']
                color = pie.cleaned_data['color']
                title = pie.cleaned_data['title']

                data_feature_list = list()
                data_feature_list.append(values)
                data_feature_list.append(names)

                if len(color) > 0:
                    data_feature_list.append(color)

                data = pd.DataFrame(ppd.get_features_data(data_feature_list))

                try:
                    fig = px.pie(data_frame=data,
                                 values=values,
                                 names=names,
                                 color=None if len(color) == 0 else color,
                                 title=title,
                                 height=800)
                    print("fig create success")
                except:
                    print("fig create error")
                    fig_error = True
                    fig = None

        if 'histogram_btn' in request.POST:
            print("Plot Histogram")
            histogram = Histogram_form(request.POST)
            histogram.fields['x'].choices = features_name
            histogram.fields['y'].choices = features_name
            histogram.fields['facet_row'].choices = category_features_name
            histogram.fields['facet_col'].choices = category_features_name
            histogram.fields['color'].choices = category_features_name
            print(histogram.errors)

            if histogram.is_valid():
                print("Histogram is valid")
                x = histogram.cleaned_data['x']
                y = histogram.cleaned_data['y']
                facet_row = histogram.cleaned_data['facet_row']
                facet_col = histogram.cleaned_data['facet_col']
                facet_col_wrap = histogram.cleaned_data['facet_col_wrap']
                color = histogram.cleaned_data['color']
                title = histogram.cleaned_data['title']
                orientation = histogram.cleaned_data['orientation']
                bar_mode = histogram.cleaned_data['bar_mode']
                marginal = histogram.cleaned_data['marginal']
                bar_norm = histogram.cleaned_data['bar_norm']
                hist_norm = histogram.cleaned_data['hist_norm']
                hist_func = histogram.cleaned_data['hist_func']
                log_x = histogram.cleaned_data['log_x']
                log_y = histogram.cleaned_data['log_y']
                cumulative = histogram.cleaned_data['cumulative']

                data_feature_list = list()
                data_feature_list.append(x)
                data_feature_list.append(y)

                if len(color) > 0:
                    data_feature_list.append(color)
                if len(facet_row) > 0:
                    data_feature_list.append(facet_row)
                if len(facet_col) > 0:
                    data_feature_list.append(facet_col)

                data = pd.DataFrame(ppd.get_features_data(data_feature_list))

                try:
                    fig = px.histogram(
                        data_frame=data,
                        x=x,
                        y=y,
                        facet_row=None if len(facet_row) == 0 else facet_row,
                        facet_col=None if len(facet_col) == 0 else facet_col,
                        facet_col_wrap=facet_col_wrap,
                        title=title,
                        color=None if len(color) == 0 else color,
                        orientation=orientation,
                        barmode=bar_mode,
                        marginal=marginal,
                        barnorm=bar_norm,
                        histnorm=hist_norm,
                        histfunc=hist_func,
                        log_x=log_x,
                        log_y=log_y,
                        cumulative=cumulative,
                        height=800)
                    print("Fig create success")
                except:
                    print("fig create error")
                    fig_error = True
                    fig = None

        if 'scatter_matrix_btn' in request.POST:
            print("Scatter Matrix Plot")
            scatter_matrix = Scatter_matrix_form(request.POST)
            scatter_matrix.fields['feature_1'].choices = numeric_features_name
            scatter_matrix.fields['feature_2'].choices = numeric_features_name
            scatter_matrix.fields['feature_3'].choices = numeric_features_name
            scatter_matrix.fields['feature_4'].choices = numeric_features_name
            scatter_matrix.fields['color'].choices = category_features_name
            scatter_matrix.fields['size'].choices = numeric_features_name
            scatter_matrix.fields['symbol'].choices = category_features_name

            if scatter_matrix.is_valid():
                print("Scatter Matrix is valid")
                feature_1 = scatter_matrix.cleaned_data['feature_1']
                feature_2 = scatter_matrix.cleaned_data['feature_2']
                feature_3 = scatter_matrix.cleaned_data['feature_3']
                feature_4 = scatter_matrix.cleaned_data['feature_4']
                color = scatter_matrix.cleaned_data['color']
                symbol = scatter_matrix.cleaned_data['symbol']
                size = scatter_matrix.cleaned_data['size']
                title = scatter_matrix.cleaned_data['title']

                data_feature_list = list()
                data_feature_list.append(feature_1)
                data_feature_list.append(feature_2)
                data_feature_list.append(feature_3)
                data_feature_list.append(feature_4)
                if len(size) > 0:
                    data_feature_list.append(size)
                if len(color) > 0:
                    data_feature_list.append(color)

                data = pd.DataFrame(ppd.get_features_data(data_feature_list))

                try:
                    fig = px.scatter_matrix(
                        data_frame=data,
                        dimensions=[
                            feature_1, feature_2, feature_3, feature_4
                        ],
                        color=None if len(color) == 0 else color,
                        symbol=None if len(symbol) == 0 else symbol,
                        size=None if len(size) == 0 else size,
                        title=title)
                    print("fig create success")
                except:
                    print("fig create error")
                    fig_error = True
                    fig = None

        if 'box_btn' in request.POST:
            box = Box_form(request.POST)
            box.fields['x'].choices = features_name
            box.fields['y'].choices = features_name
            box.fields['facet_row'].choices = category_features_name
            box.fields['facet_col'].choices = category_features_name
            box.fields['color'].choices = category_features_name
            print(box.errors)

            if box.is_valid():
                x = box.cleaned_data['x']
                y = box.cleaned_data['y']
                facet_row = box.cleaned_data['facet_row']
                facet_col = box.cleaned_data['facet_col']
                color = box.cleaned_data['color']
                facet_col_wrap = box.cleaned_data['facet_col_wrap']
                title = box.cleaned_data['title']
                orientation = box.cleaned_data['orientation']
                log_x = box.cleaned_data['log_x']
                log_y = box.cleaned_data['log_y']
                box_mode = box.cleaned_data['box_mode']
                points = box.cleaned_data['points']
                notched = box.cleaned_data['notched']

                data_feature_list = list()
                if len(x) > 0:
                    data_feature_list.append(x)
                if len(y) > 0:
                    data_feature_list.append(y)
                if len(color) > 0:
                    data_feature_list.append(color)
                if len(facet_row) > 0:
                    data_feature_list.append(facet_row)
                if len(facet_col) > 0:
                    data_feature_list.append(facet_col)

                data = pd.DataFrame(ppd.get_features_data(data_feature_list))

                try:
                    fig = px.box(
                        data_frame=data,
                        x=None if len(x) == 0 else x,
                        y=None if len(y) == 0 else y,
                        facet_row=None if len(facet_row) == 0 else facet_row,
                        facet_col=None if len(facet_col) == 0 else facet_col,
                        facet_col_wrap=facet_col_wrap,
                        title=title,
                        orientation=orientation,
                        log_x=log_x,
                        log_y=log_y,
                        boxmode=box_mode,
                        points=points,
                        notched=notched)
                    print("fig create success")
                except:
                    print("fig create error")
                    fig_error = True
                    fig = None

        if 'violin_btn' in request.POST:
            violin = Violin_form(request.POST)
            violin.fields['x'].choices = features_name
            violin.fields['y'].choices = features_name
            violin.fields['facet_row'].choices = category_features_name
            violin.fields['facet_col'].choices = category_features_name
            violin.fields['color'].choices = category_features_name
            print(violin.errors)

            if violin.is_valid():
                x = violin.cleaned_data['x']
                y = violin.cleaned_data['y']
                facet_row = violin.cleaned_data['facet_row']
                facet_col = violin.cleaned_data['facet_col']
                color = violin.cleaned_data['color']
                facet_col_wrap = violin.cleaned_data['facet_col_wrap']
                title = violin.cleaned_data['title']
                orientation = violin.cleaned_data['orientation']
                log_x = violin.cleaned_data['log_x']
                log_y = violin.cleaned_data['log_y']
                violin_mode = violin.cleaned_data['violin_mode']
                points = violin.cleaned_data['points']
                box = violin.cleaned_data['box']

                data_feature_list = list()
                if len(x) > 0:
                    data_feature_list.append(x)
                if len(y) > 0:
                    data_feature_list.append(y)
                if len(color) > 0:
                    data_feature_list.append(color)
                if len(facet_row) > 0:
                    data_feature_list.append(facet_row)
                if len(facet_col) > 0:
                    data_feature_list.append(facet_col)

                data = pd.DataFrame(ppd.get_features_data(data_feature_list))

                try:
                    fig = px.violin(
                        data_frame=data,
                        x=None if len(x) == 0 else x,
                        y=None if len(y) == 0 else y,
                        facet_row=None if len(facet_row) == 0 else facet_row,
                        facet_col=None if len(facet_col) == 0 else facet_col,
                        facet_col_wrap=facet_col_wrap,
                        title=title,
                        orientation=orientation,
                        log_x=log_x,
                        log_y=log_y,
                        violinmode=violin_mode,
                        points=points,
                        box=box)
                    print("fig create success")
                except:
                    print("fig create error")
                    fig_error = True
                    fig = None

        if 'heat_map_btn' in request.POST:
            print("Heat Map")
            heat_map_data = pd.DataFrame(ppd.get_corr_matrix())
            print(heat_map_data.columns)
            try:
                fig = px.imshow(heat_map_data.astype(float),
                                x=heat_map_data.columns,
                                y=heat_map_data.index,
                                zmax=1,
                                zmin=-1,
                                height=800)
            except:
                print("fig create error")
                fig_error = True
                fig = None

    scatter = Scatter_form()
    scatter.fields['x'].choices = features_name
    scatter.fields['y'].choices = features_name
    scatter.fields['facet_row'].choices = category_features_name
    scatter.fields['facet_col'].choices = category_features_name
    scatter.fields['color'].choices = category_features_name
    scatter.fields['size'].choices = numeric_features_name

    scatter_3d = Scatter_3d_form()
    scatter_3d.fields['x'].choices = features_name
    scatter_3d.fields['y'].choices = features_name
    scatter_3d.fields['z'].choices = features_name
    scatter_3d.fields['color'].choices = category_features_name
    scatter_3d.fields['size'].choices = numeric_features_name

    line = Line_form()
    line.fields['x'].choices = features_name
    line.fields['y'].choices = features_name
    line.fields['facet_row'].choices = category_features_name
    line.fields['facet_col'].choices = category_features_name
    line.fields['color'].choices = category_features_name

    bar = Bar_form()
    bar.fields['x'].choices = features_name
    bar.fields['y'].choices = features_name
    bar.fields['facet_row'].choices = category_features_name
    bar.fields['facet_col'].choices = category_features_name
    bar.fields['color'].choices = category_features_name

    pie = Pie_form()
    pie.fields['values'].choices = features_name
    pie.fields['names'].choices = category_features_name
    pie.fields['color'].choices = category_features_name

    histogram = Histogram_form()
    histogram.fields['x'].choices = features_name
    histogram.fields['y'].choices = features_name
    histogram.fields['facet_row'].choices = category_features_name
    histogram.fields['facet_col'].choices = category_features_name
    histogram.fields['color'].choices = category_features_name

    scatter_matrix = Scatter_matrix_form()
    scatter_matrix.fields['feature_1'].choices = numeric_features_name
    scatter_matrix.fields['feature_2'].choices = numeric_features_name
    scatter_matrix.fields['feature_3'].choices = numeric_features_name
    scatter_matrix.fields['feature_4'].choices = numeric_features_name
    scatter_matrix.fields['color'].choices = category_features_name
    scatter_matrix.fields['size'].choices = numeric_features_name
    scatter_matrix.fields['symbol'].choices = category_features_name

    box = Box_form()
    box.fields['x'].choices = features_name
    box.fields['y'].choices = features_name
    box.fields['facet_row'].choices = category_features_name
    box.fields['facet_col'].choices = category_features_name
    box.fields['color'].choices = category_features_name

    violin = Violin_form()
    violin.fields['x'].choices = features_name
    violin.fields['y'].choices = features_name
    violin.fields['facet_row'].choices = category_features_name
    violin.fields['facet_col'].choices = category_features_name
    violin.fields['color'].choices = category_features_name

    context = {
        'fig': None,
        'scatter': scatter,
        'line': line,
        'scatter_3d': scatter_3d,
        'bar': bar,
        'pie': pie,
        'histogram': histogram,
        'scatter_matrix': scatter_matrix,
        'box': box,
        'violin': violin
    }
    if fig is not None:
        context['fig'] = pio.to_html(fig=fig,
                                     full_html=False,
                                     include_plotlyjs=False)
    elif fig_error is True:
        context[
            'fig'] = "Plot Graph Error When Setting Parameters. Please Try Again!"
    else:
        context['fig'] = None
    return render(request,
                  'data_cleaning_app/plot_graphs.html',
                  context=context)
Ejemplo n.º 16
0
def fun_monit(score_pred, score_mod):
    external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
    app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
    colors = {
        'background': '#111111',
        'text': '#7FDBFF'
    }


    # assume you have a "long-form" data frame
    # see https://plotly.com/python/px-arguments/ for more options
    df1 = score_mod
    df2 = score_pred
    

    fig1 = px.histogram(df1, x='score_1',
                       barmode="overlay")

    fig2 = px.histogram(df2, x='score_1',
                       barmode="overlay")
    
    fig1.update_layout(
        plot_bgcolor=colors['background'],
        paper_bgcolor=colors['background'],
        font_color=colors['text']
    )
    
    fig2.update_layout(
        plot_bgcolor=colors['background'],
        paper_bgcolor=colors['background'],
        font_color=colors['text']
    )    
    
    
    app.layout = html.Div(children=[
        # All elements from the top of the page
        html.Div([
            html.H1(children='Histograma de Scores Modelo'),

            html.Div(children='''
                Dash: A web application framework for Python.
            '''),

            dcc.Graph(
                id='graph1',
                figure=fig1
            ),  
        ]),
        # New Div for all elements in the new 'row' of the page
        html.Div([
            html.H1(children='Histograma de Scores Predicciones'),

            html.Div(children='''
                Dash: A web application framework for Python.
            '''),

            dcc.Graph(
                id='graph2',
                figure=fig2
            ),  
        ]),
    ])
    app.run_server(host='0.0.0.0', port=8050,debug=True)
fig.show()

# In[23]:

outliers = np.where(clusters == -1)
df_X_db = df_X.drop(list(outliers[0]))
df_Y_db = df_Y.drop(list(outliers[0]))
df_dbScan = result = pd.concat([df_X_db, df_Y_db], axis=1, sort=False)
df_dbScan.to_csv(r'Filtered_DBSCAN.csv', index=False, header=True)
print(df_dbScan.head())

# In[24]:

sns.countplot(x='sex_b', data=df_dbScan)

fig = px.histogram(df_dbScan, x="sex_b", color="sex_b")
fig.update_layout(barmode='group')
fig.show()

# In[25]:

X_train, X_test, y_train, y_test = train_test_split(df_X_db,
                                                    df_Y_db,
                                                    test_size=.33,
                                                    random_state=123)
clf = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
models

# ## Isolation Forests
Ejemplo n.º 18
0
                  className="pretty_container three columns",
                  style={
                      "width": "inherit",
                  }),
     ],
     className="row container-display",
     style={
         # "margin-left":"-60px",
         "width": "100%"
     }),
 html.Div(
     [
         html.Div([
             dcc.Graph(figure=px.histogram(
                 df,
                 x='Subjectivity',
                 title='Histogram on Subjectivity of Sentiments',
                 nbins=20,
             ))
         ],
                  className="pretty_container three columns",
                  style={
                      "width": "inherit",
                  }),
         html.Div([
             dcc.Graph(figure=px.histogram(
                 df,
                 x='Sentiments',
                 title='Histogram of frequency distribution Sentiments',
             ))
         ],
                  className="pretty_container three columns",
Ejemplo n.º 19
0
import csv

import plotly.express as px

data_list = [[], [], [], []]

with open('задание 14.csv', newline='') as csvfile:
    data = csv.reader(csvfile, delimiter=';')
    for row in data:
        for i in range(4):
            data_list[i].append(row[i])

data_dict = {data_list[0][0]: data_list[0][1:],
             data_list[1][0]: data_list[1][1:],
             data_list[2][0]: data_list[2][1:],
             data_list[3][0]: data_list[3][1:]}

fig = px.histogram(data_dict, x='округ', y='балл', color="предмет",
                   template='presentation')
fig.show()
Ejemplo n.º 20
0
#print(df.iloc[:5, [2,3,5,10]])
#print(df.Genre.nunique())
#print(df.Genre.unique())
#print(sorted(df.Year.unique()))

# Data Visualization with Plotly (Python)
# -----------------------------------------------------------------

#fig_pie = px.pie(data_frame=df, names='Genre', values='Japan Sales')
#fig_pie = px.pie(data_frame=df, names='Genre', values='North American Sales')
#fig_pie.show()

#fig_bar = px.bar(data_frame=df, x='Genre', y='Japan Sales')
#fig_bar.show()

fig_hist = px.histogram(data_frame=df, x='Year', y='Japan Sales')
#fig_hist.show()

# Interactive Graphs with Dash (Python, R, Julia)
# -----------------------------------------------------------------

import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Output, Input

app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Graph Analysis with Charming Data"),
    dcc.Dropdown(id='genre-choice',
                 options=[{
Ejemplo n.º 21
0
import pytest


@pytest.mark.skipif(
    not hasattr(pd.options.plotting, "backend"),
    reason="Currently installed pandas doesn't support plotting backends.",
)
@pytest.mark.parametrize(
    "pandas_fn,px_fn",
    [
        (lambda df: df.plot(), px.line),
        (
            lambda df: df.plot.scatter("A", "B"),
            lambda df: px.scatter(df, "A", "B"),
        ),
        (lambda df: df.plot.line(), px.line),
        (lambda df: df.plot.area(), px.area),
        (lambda df: df.plot.bar(), px.bar),
        (lambda df: df.plot.barh(), lambda df: px.bar(df, orientation="h")),
        (lambda df: df.plot.box(), px.box),
        (lambda df: df.plot.hist(), px.histogram),
        (lambda df: df.boxplot(), px.box),
        (lambda df: df.hist(), px.histogram),
        (lambda df: df["A"].hist(), lambda df: px.histogram(df["A"])),
    ],
)
def test_pandas_equiv(pandas_fn, px_fn):
    pd.options.plotting.backend = "plotly"
    df = pd.DataFrame(np.random.randn(100, 4), columns=list("ABCD")).cumsum()
    assert pandas_fn(df) == px_fn(df)
Ejemplo n.º 22
0
import dash
import dash_bootstrap_components as dbc
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import pandas as pd
import plotly.express as px

# Imports from this application
from app import app

web_url = 'https://gist.githubusercontent.com/dggasque/cf034a3ce095830bd891b5af42987fb0/raw/7223d727d1aa65ad915575fe9ff46d7ee1be0f0f/mushroom_mapped.csv'

df = pd.read_csv(web_url)

fig1 = px.histogram(df, x="odor", color="class")

# 1 column layout
# https://dash-bootstrap-components.opensource.faculty.ai/l/components/layout
column1 = dbc.Col([
    dcc.Markdown("""
        
            ## Process

            ### About the Data
            
            The data for this project was originally donated to the UCI Machine Learning Repository in 1987. 
            Mushroom records are derived from *The Audubon Society Feild Guide to North American Mushrooms*(1981). 
            The guide identifies each species of mushroom as edible, definitely poisonous, or of unknown edibility. 
            The data set combines the latter class into the class poisonous. 
            There are 22 categorical features that can be used to predict whether a mushroom is edible or poisonous. 
Ejemplo n.º 23
0
#!/usr/bin/env python
# coding: utf-8

# In[ ]:

from jupyter_dash import jupyter_dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.express as px
import pandas as pd

df_pu = pd.read_csv("df_pu.csv")

fig = px.histogram(
    df_pu, x="City", y="Sales_pu",
    histfunc="sum").update_layout(xaxis={'categoryorder': 'sum descending'})
y_data = ["Sales_pu", "Profit_pu", "Discount", "Quantity"]
x_data = [
    'Ship Mode', 'Segment', 'City', 'State', 'Region', 'Category',
    'Sub-Category'
]
functions = ["sum", "min", "max", "avg"]

app = dash.Dash(__name__)
server = app.server

app.layout = html.Div(
    [
        html.H1(children=" Interactive Visualization Dashboard",
                style={
Ejemplo n.º 24
0
    def calculate(self, reference_data: pd.DataFrame,
                  production_data: pd.DataFrame, column_mapping):
        if column_mapping:
            date_column = column_mapping.get('datetime')
            id_column = column_mapping.get('id')
            target_column = column_mapping.get('target')
            prediction_column = column_mapping.get('prediction')
            num_feature_names = column_mapping.get('numerical_features')
            target_names = column_mapping.get('target_names')
            if num_feature_names is None:
                num_feature_names = []
            else:
                num_feature_names = [
                    name for name in num_feature_names
                    if is_numeric_dtype(reference_data[name])
                ]

            cat_feature_names = column_mapping.get('categorical_features')
            if cat_feature_names is None:
                cat_feature_names = []
            else:
                cat_feature_names = [
                    name for name in cat_feature_names
                    if is_numeric_dtype(reference_data[name])
                ]

        else:
            date_column = 'datetime' if 'datetime' in reference_data.columns else None
            id_column = None
            target_column = 'target' if 'target' in reference_data.columns else None
            prediction_column = 'prediction' if 'prediction' in reference_data.columns else None

            utility_columns = [
                date_column, id_column, target_column, prediction_column
            ]

            target_names = None

            num_feature_names = list(
                set(reference_data.select_dtypes([np.number]).columns) -
                set(utility_columns))
            cat_feature_names = list(
                set(reference_data.select_dtypes([np.object]).columns) -
                set(utility_columns))

        if prediction_column is not None and target_column is not None:
            binaraizer = preprocessing.LabelBinarizer()
            binaraizer.fit(reference_data[target_column])
            binaraized_target = binaraizer.transform(
                reference_data[target_column])
            if production_data is not None:
                ref_array_prediction = reference_data[
                    prediction_column].to_numpy()
                ref_prediction_ids = np.argmax(ref_array_prediction, axis=-1)
                ref_prediction_labels = [
                    prediction_column[x] for x in ref_prediction_ids
                ]
                reference_data['prediction_labels'] = ref_prediction_labels

                prod_array_prediction = production_data[
                    prediction_column].to_numpy()
                prod_prediction_ids = np.argmax(prod_array_prediction, axis=-1)
                prod_prediction_labels = [
                    prediction_column[x] for x in prod_prediction_ids
                ]
                production_data['prediction_labels'] = prod_prediction_labels

                additional_graphs_data = []
                params_data = []

                for feature_name in num_feature_names + cat_feature_names:
                    #add data for table in params
                    labels = prediction_column

                    params_data.append({
                        "details": {
                            "parts": [{
                                "title": "All",
                                "id": "All"
                            }] + [{
                                "title": str(label),
                                "id": feature_name + "_" + str(label)
                            } for label in labels],
                            "insights": []
                        },
                        "f1": feature_name
                    })

                    #create confusion based plots
                    reference_data['dataset'] = 'Reference'
                    production_data['dataset'] = 'Production'
                    merged_data = pd.concat([reference_data, production_data])

                    fig = px.histogram(merged_data,
                                       x=feature_name,
                                       color=target_column,
                                       facet_col="dataset",
                                       histnorm='',
                                       category_orders={
                                           "dataset":
                                           ["Reference", "Production"]
                                       })

                    fig_json = json.loads(fig.to_json())

                    #write plot data in table as additional data
                    additional_graphs_data.append(
                        AdditionalGraphInfo(
                            "All",
                            {
                                "data": fig_json['data'],
                                "layout": fig_json['layout']
                            },
                        ))

                    for label in labels:
                        merged_data['Confusion'] = merged_data.apply(lambda x : 'TP' if (x['target'] == label and x['prediction_labels'] == label)
                                                 else ('FP' if(x['target'] != label and x['prediction_labels'] == label) else \
                                                       ('FN' if (x['target'] == label and x['prediction_labels'] != label) else 'TN')), axis = 1)

                        fig = px.histogram(merged_data,
                                           x=feature_name,
                                           color='Confusion',
                                           facet_col="dataset",
                                           histnorm='',
                                           category_orders={
                                               "dataset":
                                               ["Reference", "Production"],
                                               "Confusion":
                                               ["TP", "TN", "FP", "FN"]
                                           })

                        fig_json = json.loads(fig.to_json())

                        #write plot data in table as additional data
                        additional_graphs_data.append(
                            AdditionalGraphInfo(
                                feature_name + "_" + str(label),
                                {
                                    "data": fig_json['data'],
                                    "layout": fig_json['layout']
                                },
                            ))

                self.wi = BaseWidgetInfo(
                    title=self.title,
                    type="big_table",
                    details="",
                    alertStats=AlertStats(),
                    alerts=[],
                    alertsPosition="row",
                    insights=[],
                    size=2,
                    params={
                        "rowsPerPage":
                        min(
                            len(num_feature_names) + len(cat_feature_names),
                            10),
                        "columns": [{
                            "title": "Feature",
                            "field": "f1"
                        }],
                        "data":
                        params_data
                    },
                    additionalGraphs=additional_graphs_data)

            else:
                ref_array_prediction = reference_data[
                    prediction_column].to_numpy()
                ref_prediction_ids = np.argmax(ref_array_prediction, axis=-1)
                ref_prediction_labels = [
                    prediction_column[x] for x in ref_prediction_ids
                ]
                reference_data['prediction_labels'] = ref_prediction_labels

                additional_graphs_data = []
                params_data = []

                for feature_name in num_feature_names + cat_feature_names:
                    #add data for table in params
                    labels = prediction_column

                    params_data.append({
                        "details": {
                            "parts": [{
                                "title": "All",
                                "id": "All"
                            }] + [{
                                "title": str(label),
                                "id": feature_name + "_" + str(label)
                            } for label in labels],
                            "insights": []
                        },
                        "f1": feature_name
                    })

                    #create confusion based plots
                    fig = px.histogram(reference_data,
                                       x=feature_name,
                                       color=target_column,
                                       histnorm='')

                    fig_json = json.loads(fig.to_json())

                    #write plot data in table as additional data
                    additional_graphs_data.append(
                        AdditionalGraphInfo(
                            "All",
                            {
                                "data": fig_json['data'],
                                "layout": fig_json['layout']
                            },
                        ))

                    for label in labels:
                        reference_data['Confusion'] = reference_data.apply(lambda x : 'TP' if (x['target'] == label and x['prediction_labels'] == label)
                                                 else ('FP' if(x['target'] != label and x['prediction_labels'] == label) else \
                                                       ('FN' if (x['target'] == label and x['prediction_labels'] != label) else 'TN')), axis = 1)

                        fig = px.histogram(reference_data,
                                           x=feature_name,
                                           color='Confusion',
                                           histnorm='',
                                           category_orders={
                                               "Confusion":
                                               ["TP", "TN", "FP", "FN"]
                                           })

                        fig_json = json.loads(fig.to_json())

                        #write plot data in table as additional data
                        additional_graphs_data.append(
                            AdditionalGraphInfo(
                                feature_name + "_" + str(label),
                                {
                                    "data": fig_json['data'],
                                    "layout": fig_json['layout']
                                },
                            ))

                self.wi = BaseWidgetInfo(
                    title=self.title,
                    type="big_table",
                    details="",
                    alertStats=AlertStats(),
                    alerts=[],
                    alertsPosition="row",
                    insights=[],
                    size=2,
                    params={
                        "rowsPerPage":
                        min(
                            len(num_feature_names) + len(cat_feature_names),
                            10),
                        "columns": [{
                            "title": "Feature",
                            "field": "f1"
                        }],
                        "data":
                        params_data
                    },
                    additionalGraphs=additional_graphs_data)
        else:
            self.wi = None
Ejemplo n.º 25
0
 def histogram(self, **kwargs):
     return px.histogram(data_frame=self.df, **kwargs)
Ejemplo n.º 26
0
def main():
    # generando un dataframe
    data = get_data()

    # entrenando el modelo
    model = train_model()

    # título
    st.title(
        "Data App - Prediciendo el Valor de Inmuebles de la Ciudad de Boston")

    # subtítulo
    st.info(
        "Este es un App de Predicción de Machine Learning utilizado para exibir el problema de predicción de valores de inmuebles de la ciudad de Boston."
    )

    # verificando el dataset
    st.subheader(
        "Seleccione el conjunto de características de la base de datos")

    # atributos que son exibidos por default
    defaultcols = ["RM", "PTRATIO", "LSTAT", "MEDV"]

    # definiendo atributos a partir de multiselect
    cols = st.multiselect("Características",
                          data.columns.tolist(),
                          default=defaultcols)

    # exibiendo los top 10 registros del dataframe
    st.dataframe(data[cols].head(10))

    st.subheader("Distribución de inmuebles por precio")

    # definienndo el rango de valores
    faixa_valores = st.slider("Rango de precios", float(data.MEDV.min()), 150.,
                              (10.0, 100.0))

    # filtrando los datos
    dados = data[data['MEDV'].between(left=faixa_valores[0],
                                      right=faixa_valores[1])]

    # plot la distribuicion de los datos
    f = px.histogram(dados,
                     x="MEDV",
                     nbins=100,
                     title="Distribución de Precios")
    f.update_xaxes(title="MEDV")
    f.update_yaxes(title="Total de Inmuebles")
    st.plotly_chart(f)

    st.sidebar.subheader("Defina los atributos del inmueble para predicción")

    # mapeando datos de usuário para cada atributo
    crim = st.sidebar.number_input("Tasa de Criminalidad",
                                   value=data.CRIM.mean())
    indus = st.sidebar.number_input("Proporción de Hectares de Negócio",
                                    value=data.CRIM.mean())
    chas = st.sidebar.selectbox("Tiene límite con el río?", ("Si", "No"))

    # transformando los datos de entrada en valor binário
    chas = 1 if chas == "Si" else 0

    nox = st.sidebar.number_input("Concentración de óxido nítrico",
                                  value=data.NOX.mean())

    rm = st.sidebar.number_input("Número de Cuartos", value=1)

    ptratio = st.sidebar.number_input("Índice de alunos para profesores",
                                      value=data.PTRATIO.mean())

    b = st.sidebar.number_input(
        "Proporción de personar de descendencia afro-americana",
        value=data.B.mean())

    lstat = st.sidebar.number_input("Porcentaje de status bajo",
                                    value=data.LSTAT.mean())

    # insertando un boton en la pantalla
    btn_predict = st.sidebar.button("Realizar Predicción")

    #Agradecimiento
    st.sidebar.info("Desarrollado por Juan Minango")

    # verifica se o botão foi acionado
    if btn_predict:
        result = model.predict(
            [[crim, indus, chas, nox, rm, ptratio, b, lstat]])
        st.subheader(
            "El valor previsto para el inmueble con las caracteristicas escojidas es:"
        )
        result = "US $ " + str(round(result[0] * 10, 2))
        st.write(result)
Ejemplo n.º 27
0
    def request_resource(self, turbine):
        """
        A wind turbine has requested a vessel to fix a failure. This will either
        allocate a vessel to the turbine or let it know to wait for a period
        before it will try to allocate a vessel again.
        """
        return self.CTVs[0]


print('Wind Site')
#random.seed(RANDOM_SEED)
env = simpy.Environment()
CTVs = [simpy.PreemptiveResource(env, capacity=1000)]
resource_manager = resource_manager(env, "rm1", CTVs)
turbines = [
    turbine(env, 'Turbine %d' % i, resource_manager)
    for i in range(NUM_TURBINES)
]
env.run(until=SIM_TIME)

turbine_data = pd.DataFrame([])
turbine_data["Uptime"] = [(turbines[i].power / SIM_TIME) * 100
                          for i in range(NUM_TURBINES)]
turbine_data["Failures"] = [
    turbines[i].num_failures for i in range(NUM_TURBINES)
]
fig1 = px.histogram(turbine_data, x="Uptime", nbins=100)
fig1.show()
fig2 = px.histogram(turbine_data, x="Failures", nbins=50)
fig2.show()
# %matplotlib inline

df = pd.read_csv('Heart Disease.csv')

df.head()

df.isnull().sum()

df.value_counts('target')

df.value_counts('target').iloc[0] / len(df)
"""Visualisation"""

import plotly.express as ex

fig = ex.histogram(x=df['target'], color=df['sex'])
fig.show()

df.corr()['target'].sort_values()

plt.figure(figsize=(20, 12))
sns.countplot(x=df['age'], hue=df['target'])
"""Train Test Split"""

X = df.drop(['target', 'sex', 'age', 'trestbps', 'chol', 'fbs', 'restecg'],
            axis=1)
y = df['target']

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,
Ejemplo n.º 29
0
                           values=plot_sentiment(choice[j]).Tweets,
                           showlegend=True), i + 1, j + 1)
        fig_3.update_layout(height=600, width=800)
        st.plotly_chart(fig_3)
st.sidebar.subheader("Breakdown airline by sentiment")
choice = st.sidebar.multiselect('Pick airlines',
                                ('US Airways', 'United', 'American',
                                 'Southwest', 'Delta', 'Virgin America'),
                                key=0)
if len(choice) > 0:
    choice_data = data[data.airline.isin(choice)]
    fig_0 = px.histogram(choice_data,
                         x='airline',
                         y='airline_sentiment',
                         histfunc='count',
                         color='airline_sentiment',
                         facet_col='airline_sentiment',
                         labels={'airline_sentiment': 'tweets'},
                         height=600,
                         width=800)
    st.plotly_chart(fig_0)

st.sidebar.header("Word Cloud")
word_sentiment = st.sidebar.radio('Display word cloud for what sentiment?',
                                  ('positive', 'neutral', 'negative'))
if not st.sidebar.checkbox("Close", False, key='3'):
    st.subheader('Word cloud for %s sentiment' % (word_sentiment))
    df = data[data['airline_sentiment'] == word_sentiment]
    words = ' '.join(df['text'])
    processed_words = ' '.join([
        word for word in words.split()
Ejemplo n.º 30
0
import pandas as pd
import plotly.express as px
from constants import FILE

df = pd.read_csv(FILE)

st.title('Projeto prático do tutorial da Python Brasil')
st.markdown("## Nutrition Facts for McDonald's Menu")

categories = df['Category'].unique().tolist()

st.markdown("### Describe")
if st.checkbox('Apresentar describe símples'):
    st.write(df.describe())
if st.checkbox('Apresentar describe agrupado por categoria'):
    category = st.selectbox(
        'Selecione uma categoria para mostrar um describe da categoria.',
        categories)
    st.write(df.loc[df['Category'] == category].describe())

if st.checkbox('Mostrar dataframe'):
    st.dataframe(df)
if st.checkbox('Mostrar dataframe como tabela'):
    st.table(df)

columns = categories = df.columns.tolist()
column = st.selectbox('Selecione uma coluna para plotar o histograma.',
                      columns)

fig = px.histogram(df, x=column)
st.plotly_chart(fig)