def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping): if column_mapping: date_column = column_mapping.get('datetime') id_column = column_mapping.get('id') target_column = column_mapping.get('target') prediction_column = column_mapping.get('prediction') num_feature_names = column_mapping.get('numerical_features') if num_feature_names is None: num_feature_names = [] else: num_feature_names = [name for name in num_feature_names if is_numeric_dtype(reference_data[name])] cat_feature_names = column_mapping.get('categorical_features') if cat_feature_names is None: cat_feature_names = [] else: cat_feature_names = [name for name in cat_feature_names if is_numeric_dtype(reference_data[name])] else: date_column = 'datetime' if 'datetime' in reference_data.columns else None id_column = None target_column = 'target' if 'target' in reference_data.columns else None prediction_column = 'prediction' if 'prediction' in reference_data.columns else None utility_columns = [date_column, id_column, target_column, prediction_column] num_feature_names = list(set(reference_data.select_dtypes([np.number]).columns) - set(utility_columns)) cat_feature_names = list(set(reference_data.select_dtypes([np.object]).columns) - set(utility_columns)) if prediction_column is not None and target_column is not None: additional_graphs_data = [] params_data = [] for feature_name in num_feature_names + cat_feature_names: #add data for table in params params_data.append( { "details": { "parts": [ { "title": "Target", "id": feature_name + "_target_values" }, { "title": "Prediction", "id": feature_name + "_prediction_values" } ], "insights": [] }, "f1": feature_name } ) #create target plot reference_data['dataset'] = 'Reference' production_data['dataset'] = 'Production' merged_data = pd.concat([reference_data, production_data]) target_fig = px.histogram(merged_data, x=feature_name, color=target_column, facet_col="dataset", category_orders={"dataset": ["Reference", "Production"]}) target_fig_json = json.loads(target_fig.to_json()) #create prediction plot pred_fig = px.histogram(merged_data, x=feature_name, color=prediction_column, facet_col="dataset", category_orders={"dataset": ["Reference", "Production"]}) pred_fig_json = json.loads(pred_fig.to_json()) #write plot data in table as additional data additional_graphs_data.append( AdditionalGraphInfo( feature_name + '_target_values', { "data" : target_fig_json['data'], "layout" : target_fig_json['layout'] }, ) ) additional_graphs_data.append( AdditionalGraphInfo( feature_name + '_prediction_values', { "data" : pred_fig_json['data'], "layout" : pred_fig_json['layout'] }, ) ) self.wi = BaseWidgetInfo( title=self.title, type="big_table", details="", alertStats=AlertStats(), alerts=[], alertsPosition="row", insights=[], size=2, params={ "rowsPerPage" : min(len(num_feature_names) + len(cat_feature_names), 10), "columns": [ { "title": "Feature", "field": "f1" } ], "data": params_data }, additionalGraphs=additional_graphs_data ) elif target_column is not None: additional_graphs_data = [] params_data = [] for feature_name in num_feature_names + cat_feature_names: #add data for table in params params_data.append( { "details": { "parts": [ { "title": "Target", "id": feature_name + "_target_values" } ], "insights": [] }, "f1": feature_name } ) #create target plot reference_data['dataset'] = 'Reference' production_data['dataset'] = 'Production' merged_data = pd.concat([reference_data, production_data]) target_fig = px.histogram(merged_data, x=feature_name, color=target_column, facet_col="dataset", category_orders={"dataset": ["Reference", "Production"]}) target_fig_json = json.loads(target_fig.to_json()) #write plot data in table as additional data additional_graphs_data.append( AdditionalGraphInfo( feature_name + '_target_values', { "data" : target_fig_json['data'], "layout" : target_fig_json['layout'] }, ) ) self.wi = BaseWidgetInfo( title=self.title, type="big_table", details="", alertStats=AlertStats(), alerts=[], alertsPosition="row", insights=[], size=2, params={ "rowsPerPage" : min(len(num_feature_names) + len(cat_feature_names), 10), "columns": [ { "title": "Feature", "field": "f1" } ], "data": params_data }, additionalGraphs=additional_graphs_data ) elif prediction_column is not None: additional_graphs_data = [] params_data = [] for feature_name in num_feature_names + cat_feature_names: #add data for table in params params_data.append( { "details": { "parts": [ { "title": "Prediction", "id": feature_name + "_prediction_values" } ], "insights": [] }, "f1": feature_name } ) #create target plot reference_data['dataset'] = 'Reference' production_data['dataset'] = 'Production' merged_data = pd.concat([reference_data, production_data]) prediction_fig = px.histogram(merged_data, x=feature_name, color=prediction_column, facet_col="dataset", category_orders={"dataset": ["Reference", "Production"]}) prediction_fig_json = json.loads(prediction_fig.to_json()) #write plot data in table as additional data additional_graphs_data.append( AdditionalGraphInfo( feature_name + '_prediction_values', { "data" : prediction_fig_json['data'], "layout" : prediction_fig_json['layout'] }, ) ) self.wi = BaseWidgetInfo( title=self.title, type="big_table", details="", alertStats=AlertStats(), alerts=[], alertsPosition="row", insights=[], size=2, params={ "rowsPerPage" : min(len(num_feature_names) + len(cat_feature_names), 10), "columns": [ { "title": "Feature", "field": "f1" } ], "data": params_data }, additionalGraphs=additional_graphs_data ) else: self.wi = None
def histogram(self, title='', bins=50, color='blue', metric=''): px.histogram(self.sort_values(metric), x=metric, title=title, nbins=bins, color_discrete_sequence=[color]).show()
cols = st.multiselect("Atributos", df.columns.tolist(), default=default_cols) # exibindo os 10 primeiros registros do df st.dataframe(df[cols].head(10)) st.subheader("Distribuição de imóveis por preço") # definindo a faixa de valores faixa_valores = st.slider("Faixa de preço", float(df.MEDV.min()), 150., (10.0, 100.0)) # filtrando os dados dados = df[df.MEDV.between(left=faixa_valores[0], right=faixa_valores[1])] # plot a distribuição dos dados f = px.histogram(dados, x='MEDV', nbins=100, title='Distribuição de Preços') f.update_xaxes(title='MEDV') f.update_yaxes(title='Total Imóveis') st.plotly_chart(f) st.sidebar.subheader("Defina os atributos do imóvel para predição") # mapeando dados do usuário para cada atributo crim = st.sidebar.number_input("Taxa de criminalidade", value=df.CRIM.mean()) indus = st.sidebar.number_input("Proporção de hectares de negócio", value=df.INDUS.mean()) chas = st.sidebar.selectbox("Faz limite com o rio?", ('Sim', "Não")) # transformando o dado de entrada em valor binario chas = 1 if chas == 'Sim' else 0
def update_graph2(chosen_column): dff2 = df[chosen_column][:1000] fig2 = px.histogram(dff2, x=chosen_column) return fig2
def update_histogram(x_data, y_data, func): fig = px.histogram(df_pu, x=x_data, y=y_data, histfunc=func) fig.update_layout(xaxis={'categoryorder': 'sum descending'}) return fig
# exibindo os top 10 registro do dataframe st.dataframe(data[cols].head(10)) st.subheader("Distribuição de imóveis por preço") # definindo a faixa de valores faixa_valores = st.slider("Faixa de preço", float(data.MEDV.min()), 150., (10.0, 100.0)) # filtrando os dados dados = data[data['MEDV'].between(left=faixa_valores[0], right=faixa_valores[1])] # plot a distribuição dos dados f = px.histogram(dados, x="MEDV", nbins=100, title="Distribuição de Preços") f.update_xaxes(title="MEDV") f.update_yaxes(title="Total Imóveis") st.plotly_chart(f) st.sidebar.subheader("Defina os atributos do imóvel para predição") # mapeando dados do usuário para cada atributo crim = st.sidebar.number_input("Taxa de Criminalidade", value=data.CRIM.mean()) indus = st.sidebar.number_input("Proporção de Hectares de Negócio", value=data.CRIM.mean()) chas = st.sidebar.selectbox("Faz limite com o rio?", ("Sim", "Não")) # transformando o dado de entrada em valor binário chas = 1 if chas == "Sim" else 0
def main(): st.title( "Prediction of Trip History Data using various Machine Learning Classification Algorithms- A Streamlit Demo!" ) data = loadData() X_train, X_test, y_train, y_test, le = preprocessing(data) # Insert Check-Box to show the snippet of the data. if st.checkbox('Show Raw Data'): st.subheader("Showing raw data---->>>") st.write(data.head()) # ML Section choose_model = st.sidebar.selectbox( "Choose the ML Model", ["NONE", "Decision Tree", "Neural Network", "K-Nearest Neighbours"]) if (choose_model == "Decision Tree"): score, report, tree = decisionTree(X_train, X_test, y_train, y_test) st.text("Accuracy of Decision Tree model is: ") st.write(score, "%") st.text("Report of Decision Tree model is: ") st.write(report) try: if (st.checkbox( "Want to predict on your own Input? It is recommended to have a look at dataset to enter values in below tabs than just typing in random values" )): user_prediction_data = accept_user_data() pred = tree.predict(user_prediction_data) st.write( "The Predicted Class is: ", le.inverse_transform(pred) ) # Inverse transform to get the original dependent value. except: pass elif (choose_model == "Neural Network"): score, report, clf = neuralNet(X_train, X_test, y_train, y_test) st.text("Accuracy of Neural Network model is: ") st.write(score, "%") st.text("Report of Neural Network model is: ") st.write(report) try: if (st.checkbox( "Want to predict on your own Input? It is recommended to have a look at dataset to enter values in below tabs than just typing in random values" )): user_prediction_data = accept_user_data() scaler = StandardScaler() scaler.fit(X_train) user_prediction_data = scaler.transform(user_prediction_data) pred = clf.predict(user_prediction_data) st.write( "The Predicted Class is: ", le.inverse_transform(pred) ) # Inverse transform to get the original dependent value. except: pass elif (choose_model == "K-Nearest Neighbours"): score, report, clf = Knn_Classifier(X_train, X_test, y_train, y_test) st.text("Accuracy of K-Nearest Neighbour model is: ") st.write(score, "%") st.text("Report of K-Nearest Neighbour model is: ") st.write(report) try: if (st.checkbox( "Want to predict on your own Input? It is recommended to have a look at dataset to enter values in below tabs than just typing in random values" )): user_prediction_data = accept_user_data() pred = clf.predict(user_prediction_data) st.write( "The Predicted Class is: ", le.inverse_transform(pred) ) # Inverse transform to get the original dependent value. except: pass # Visualization Section plotData = showMap() st.subheader( "Bike Travel History data plotted-first few locations located near Washington DC" ) st.map(plotData, zoom=14) choose_viz = st.sidebar.selectbox("Choose the Visualization", [ "NONE", "Total number of vehicles from various Starting Points", "Total number of vehicles from various End Points", "Count of each Member Type" ]) if (choose_viz == "Total number of vehicles from various Starting Points"): fig = px.histogram(data['Start station'], x='Start station') st.plotly_chart(fig) elif (choose_viz == "Total number of vehicles from various End Points"): fig = px.histogram(data['End station'], x='End station') st.plotly_chart(fig) elif (choose_viz == "Count of each Member Type"): fig = px.histogram(data['Member type'], x='Member type') st.plotly_chart(fig)
st.markdown("# Rejected CA License Plates") df = fetch_data() st.markdown("## Random") random_set = df.sample(3).to_dict(orient='records') for row in random_set: st.markdown(f''' ## {row["plate"]} * Customer Explanation: {row["customer_meaning"]} * Flag Reason: {row["reason"]} * Reviewer Comments: {row["reviewer_comments"]} * __{"REJECTED" if row["status"] == "N" else "APPROVED"}__ ''') st.button("Try Another!") st.markdown("## Summary Stats") st.write(df.describe()) st.markdown("## Approvals") st.plotly_chart( px.histogram(df.query('status in ["Y", "N"]'), x='status', title='Approved?')) st.markdown("## Common Flags") st.plotly_chart(px.histogram(df, x='reason', title='Flagging')) st.markdown("## Data") st.dataframe(df, width=1000, height=1200)
# visit http://127.0.0.1:8050/ in your web browser. import dash import dash_core_components as dcc import dash_html_components as html import plotly.express as px import pandas as pd external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css'] app = dash.Dash(__name__, external_stylesheets=external_stylesheets) colors = {'background': '#111111', 'text': '#7FDBFF'} df = pd.read_csv("data/googleplaystore.csv") fig = px.histogram(df, x="Rating", range_x=[0.8, 5.2]) fig.update_layout(plot_bgcolor=colors['background'], paper_bgcolor=colors['background'], font_color=colors['text']) app.layout = html.Div(style={'backgroundColor': colors['background']}, children=[ html.H1(children='Dashboard aplicaciones Android', style={ 'textAlign': 'center', 'color': colors['text'] }), html.Div(children=''' Primera app de aprendizaje con Dash ''', style={
def cluster(key, maxClusters): summaryStats = pd.DataFrame(columns=['BIC', 'Likelihood']) predictions = pd.DataFrame(normData['subjid']) variables = featureDict[key] for components in range(1, maxClusters + 1): # Fit GMM model gmm = GaussianMixture(n_components=components, covariance_type='diag', random_state=0).fit(normData[variables]) # bic and score for the model summaryStats.loc[components, :] = [ gmm.bic(normData[variables]), gmm.score(normData[variables]) ] clusteringName = key + str(components) # Get means of variables in allVarsNeth by cluster predictions[clusteringName] = gmm.predict(normData[variables]) normData_pred = pd.merge(normData, predictions, on='subjid') means = normData_pred[allVarsNeth.union([clusteringName ])].groupby([clusteringName ]).mean().T # means and covariances of the GMM components #means = pd.DataFrame( gmm.means_, columns = normData[variables].columns).T #covs = gmm.covariances_, # Create a plot of the means figure = px.line(means, x=means.index, y=means.columns, template="simple_white") savepath = root + 'stevenkerr/Git/wp5-clustering/Code/clinical/GMM_clustering_' + key means.to_csv(savepath + '/means' + str(components) + '.csv') pio.write_html(figure, file=savepath + '/means' + str(components) + '.html', auto_open=False) # Create histrogram of sf94 by cluster data_pred = pd.merge(data, predictions, on='subjid') histFig = px.histogram(data_pred, x='sf94', color=clusteringName, nbins = 10, \ barmode = 'group', histnorm='probability density') pio.write_html(histFig, file=savepath + '/sf94' + str(components) + '.html', auto_open=False) return (summaryStats, predictions)
if part == 'Introduction': st.title('Flight Delay Analysis') st.markdown(''' ## Structure of Notebook 1. Basic Data Introduction 2. Analyze columns with a lot of null values 3. Flight Distance Analysis 4. Departure Delay Temporal Analysis 5. Analysis of Categorical Variables of Interest 6. Final conclusions ''') st.title('Basic Introduction') fig = px.histogram(data_frame=df, x='arr_delay') st.plotly_chart(fig, use_container_width=True) #%% fig1 = px.box(data_frame=df, x='arr_delay') st.plotly_chart(fig1, use_container_width=True) # %% st.markdown(''' ## Initial Data Introduction - Inital look at data tells us that flight delays are very skewed. - Majority of flights are expected to be early, or on time as the median fligth delay is `-6` - Although most flights make good time. Their seems to be a tendancy for extreme outliers. ''') #%% elif part == 'Analyze columns with a lot of null values':
def variables(): """ Fonction qui traite toutes les colonnes de notre CSV afin de les décrirent sous forme quantitative ou qualitative :return: Renvoi vers variables.html """ # Adaptez le chemin à votre poste df = pd.read_csv( "ina-barometre-jt-tv-donnees-mensuelles-2005-2018-nombre-de-sujets.csv", encoding='1252', delimiter=';') # Reformate la date df["MOIS"] = pd.to_datetime(df["MOIS"], format="%d/%m/%Y") ################################################################ # Description Variable Année df2 = df df2["ANNEE"] = pd.DatetimeIndex(df2["MOIS"]).year date_toto = df2.iloc[:, [8, 9]].groupby(["ANNEE"]).sum() date_mean = round(df2.iloc[:, [8, 9]].groupby(["ANNEE"]).mean(), 2) date_std = round(df2.iloc[:, [8, 9]].groupby(["ANNEE"]).std(), 2) df2["MONTH"] = pd.DatetimeIndex(df2["MOIS"]).month month_toto = df2.iloc[:, [8, 10]].groupby(["MONTH"]).sum() ################################################################ # Description Variable Thématique thematique_grouped = df.groupby("THEMATIQUES").sum() grouped_total = thematique_grouped.iloc[:, 6] effectif_thematique = thematique_grouped.iloc[:, 6].sum() percent_tot = round((grouped_total / effectif_thematique) * 100, 2) ################################################################ # Description Variables Chaînes mean_tot_chaine = round(df.iloc[:, 2:8].mean(), 2) std_tot_chaine = round(df.iloc[:, 2:8].std(), 2) sum_tot_chaine = df.iloc[:, 2:8].sum() ################################################################ # Graph Variable Année fig_date_toto = px.line(date_toto, x=date_toto.index, y=date_toto["Totaux"], labels={ "ANNEE": "", "Totaux": "" }) fig_date_toto.update_traces(mode='markers+lines') fig_date_toto.update_layout({ 'plot_bgcolor': 'rgba(0, 0, 0, 0)', 'paper_bgcolor': 'rgba(0, 0, 0, 0)', }) figure_date_toto = to_json(fig_date_toto) ################################################################ # Graph Variable Thématique fig = px.pie(grouped_total, names=grouped_total.index, values="Totaux") fig.update_layout({ 'plot_bgcolor': 'rgba(0, 0, 0, 0)', 'paper_bgcolor': 'rgba(0, 0, 0, 0)', }) fig.update_layout(legend=dict( yanchor="top", y=0.99, xanchor="left", x=0 )) figure_thematique = to_json(fig) ################################################################ # Graph Variables Chaînes fig_tot_chaine = px.histogram(std_tot_chaine, x=std_tot_chaine.index, y=sum_tot_chaine, color=std_tot_chaine.index , labels={ "index": "", "count": "", "sum of y": "", "y": "" }) fig_tot_chaine.update_layout({ 'plot_bgcolor': 'rgba(0, 0, 0, 0)', 'paper_bgcolor': 'rgba(0, 0, 0, 0)', }) figure_chaines = to_json(fig_tot_chaine) return render_template("variables.html", date_mean=date_mean, date_std=date_std, grouped_total=grouped_total, percent_tot=percent_tot, mean_tot_chaine=mean_tot_chaine, std_tot_chaine=std_tot_chaine, figure_date_toto=figure_date_toto, figure_thematique=figure_thematique, figure_chaines=figure_chaines)
df = pd.read_csv( 'https://raw.githubusercontent.com/Coding-with-Adam/Dash-by-Plotly/master/Callbacks/Client-side-callback/opsales.csv' ) external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css'] app = dash.Dash(__name__, external_stylesheets=external_stylesheets) app.layout = html.Div([ html.Button('Print Graphs', id='printing'), html.Div(id='hidden-content'), html.H1('Analysis of Store Sales', style={'textAlign': 'center'}), dcc.Graph(id='one', figure=px.pie(df, names="Shipping Mode", values="Sales").update_traces( textinfo='label+percent', showlegend=False)), dcc.Graph(id='two', figure=px.histogram(df, x="Order Status", y="Sales")), dcc.Graph(id='thr', figure=px.pie(df, names="Customer Segment", values="Sales").update_traces( textinfo='label+percent', showlegend=False)), ]) app.clientside_callback( """ function(clicks) { if (clicks > 0) { window.print() } return "" } """, Output('hidden-content', 'children'), Input('printing', 'n_clicks'))
def make_grouped_plot( self, ensembles: list, prop: str, selector_values: List[Any], statistic: str = "Avg", plot_type: str = "histogram", ) -> go.Figure: sel_length = 1 for selector in selector_values: sel_length *= len(selector) if sel_length > 50 and plot_type != "scatter_ensemble": return { "layout": { "title": "Reduce number of filter selections to display graph", } } df = self.dataframe.copy() df = df[df["PROPERTY"] == prop] if selector_values is not None: df = self.filter_dataframe(df, self.selectors, selector_values) df = df[df["ENSEMBLE"].isin(ensembles)] if plot_type == "histogram": fig = px.histogram( df, x=statistic, nbins=20, facet_col="label", facet_col_wrap=5, color="ENSEMBLE", barmode="stack", color_discrete_sequence=self.colorway, ) if plot_type == "bar": fig = px.bar( df, y=statistic, x="REAL", facet_col="label", facet_col_wrap=5, color="ENSEMBLE", barmode="group", color_discrete_sequence=self.colorway, ) if plot_type == "scatter": fig = px.scatter( df, y=statistic, x="REAL", facet_col="label", facet_col_wrap=5, color="ENSEMBLE", color_discrete_sequence=self.colorway, ) if plot_type == "scatter_ensemble": fig = px.scatter( df, y=statistic, x="REAL", facet_col="ENSEMBLE", facet_col_wrap=5, color="label", color_discrete_sequence=self.colorway, ) fig = fig.to_dict() fig["layout"] = self.theme.create_themed_layout(fig["layout"]) return fig
def plot_graphs(request): print("plot_graphs function") global ppd fig = None fig_error = False blank_choice = (None, '---------') features_name = [(i, i) for i in ppd.getFeatureName()] category_features_name = [(i, i) for i in ppd.get_category_list()] numeric_features_name = [(i, i) for i in ppd.get_numeric_features_name()] features_name.append(blank_choice) category_features_name.append(blank_choice) numeric_features_name.append(blank_choice) if request.method == 'POST': if 'scatter_btn' in request.POST: print("scatter form") print(request.POST) scatter = Scatter_form(request.POST) scatter.fields['x'].choices = features_name scatter.fields['y'].choices = features_name scatter.fields['facet_row'].choices = category_features_name scatter.fields['facet_col'].choices = category_features_name scatter.fields['color'].choices = category_features_name scatter.fields['size'].choices = numeric_features_name print(scatter.errors) if scatter.is_valid(): print("form valid") x = scatter.cleaned_data['x'] y = scatter.cleaned_data['y'] facet_row = scatter.cleaned_data['facet_row'] facet_col = scatter.cleaned_data['facet_col'] facet_col_wrap = scatter.cleaned_data['facet_col_wrap'] color = scatter.cleaned_data['color'] size = scatter.cleaned_data['size'] data_feature_list = list() data_feature_list.append(x) data_feature_list.append(y) if len(size) > 0: data_feature_list.append(size) if len(color) > 0: data_feature_list.append(color) if len(facet_row) > 0: data_feature_list.append(facet_row) if len(facet_col) > 0: data_feature_list.append(facet_col) data = pd.DataFrame(ppd.get_features_data(data_feature_list)) try: fig = px.scatter( data_frame=data, x=x, y=y, facet_row=None if len(facet_row) == 0 else facet_row, facet_col=None if len(facet_col) == 0 else facet_col, facet_col_wrap=facet_col_wrap, title=scatter.cleaned_data['title'], color=None if len(color) == 0 else color, size=None if len(size) == 0 else size, log_x=scatter.cleaned_data['log_x'], log_y=scatter.cleaned_data['log_y'], render_mode=scatter.cleaned_data['render_mode'], height=800) print("fig create success") except: print("fig create error") fig_error = True fig = None if 'scatter_3d_btn' in request.POST: scatter_3d = Scatter_3d_form(request.POST) scatter_3d.fields['x'].choices = features_name scatter_3d.fields['y'].choices = features_name scatter_3d.fields['z'].choices = features_name scatter_3d.fields['color'].choices = category_features_name scatter_3d.fields['size'].choices = numeric_features_name if scatter_3d.is_valid(): x = scatter_3d.cleaned_data['x'] y = scatter_3d.cleaned_data['y'] z = scatter_3d.cleaned_data['z'] color = scatter_3d.cleaned_data['color'] size = scatter_3d.cleaned_data['size'] data_feature_list = list() data_feature_list.append(x) data_feature_list.append(y) data_feature_list.append(z) if len(size) > 0: data_feature_list.append(size) if len(color) > 0: data_feature_list.append(color) data = pd.DataFrame(ppd.get_features_data(data_feature_list)) try: fig = px.scatter_3d( data_frame=data, x=x, y=y, z=z, title=scatter_3d.cleaned_data['title'], color=None if len(color) == 0 else color, size=None if len(size) == 0 else size, log_x=scatter_3d.cleaned_data['log_x'], log_y=scatter_3d.cleaned_data['log_y'], log_z=scatter_3d.cleaned_data['log_z'], height=800) print("fig create success") except: print("fig create error") fig_error = True fig = None if 'line_btn' in request.POST: print("Line Plot") line = Line_form(request.POST) line.fields['x'].choices = features_name line.fields['y'].choices = features_name line.fields['facet_row'].choices = category_features_name line.fields['facet_col'].choices = category_features_name line.fields['color'].choices = category_features_name if line.is_valid(): print("line is valid") x = line.cleaned_data['x'] y = line.cleaned_data['y'] facet_row = line.cleaned_data['facet_row'] facet_col = line.cleaned_data['facet_col'] facet_col_wrap = line.cleaned_data['facet_col_wrap'] color = line.cleaned_data['color'] data_feature_list = list() data_feature_list.append(x) data_feature_list.append(y) if len(color) > 0: data_feature_list.append(color) if len(facet_row) > 0: data_feature_list.append(facet_row) if len(facet_col) > 0: data_feature_list.append(facet_col) data = pd.DataFrame(ppd.get_features_data(data_feature_list)) try: fig = px.line( data_frame=data, x=x, y=y, facet_row=None if len(facet_row) == 0 else facet_row, facet_col=None if len(facet_col) == 0 else facet_col, facet_col_wrap=facet_col_wrap, title=line.cleaned_data['title'], color=None if len(color) == 0 else color, height=800) print("fig create success") except: print("fig create error") fig_error = True fig = None if 'bar_btn' in request.POST: print("Bar Plot") bar = Bar_form(request.POST) bar.fields['x'].choices = features_name bar.fields['y'].choices = features_name bar.fields['facet_row'].choices = category_features_name bar.fields['facet_col'].choices = category_features_name bar.fields['color'].choices = category_features_name if bar.is_valid(): print("Bar is valid") x = bar.cleaned_data['x'] y = bar.cleaned_data['y'] facet_row = bar.cleaned_data['facet_row'] facet_col = bar.cleaned_data['facet_col'] facet_col_wrap = bar.cleaned_data['facet_col_wrap'] color = bar.cleaned_data['color'] title = bar.cleaned_data['title'] orientation = bar.cleaned_data['orientation'] bar_mode = bar.cleaned_data['bar_mode'] data_feature_list = list() data_feature_list.append(x) data_feature_list.append(y) if len(color) > 0: data_feature_list.append(color) if len(facet_row) > 0: data_feature_list.append(facet_row) if len(facet_col) > 0: data_feature_list.append(facet_col) data = pd.DataFrame(ppd.get_features_data(data_feature_list)) try: fig = px.bar( data_frame=data, x=x, y=y, facet_row=None if len(facet_row) == 0 else facet_row, facet_col=None if len(facet_col) == 0 else facet_col, facet_col_wrap=facet_col_wrap, title=title, color=None if len(color) == 0 else color, orientation=orientation, barmode=bar_mode, height=800) print("fig create success") except: print("fig create error") fig_error = True fig = None if 'pie_btn' in request.POST: print("Pie Plot") pie = Pie_form(request.POST) pie.fields['values'].choices = features_name pie.fields['names'].choices = category_features_name pie.fields['color'].choices = category_features_name if pie.is_valid(): print("Pie is valid") values = pie.cleaned_data['values'] names = pie.cleaned_data['names'] color = pie.cleaned_data['color'] title = pie.cleaned_data['title'] data_feature_list = list() data_feature_list.append(values) data_feature_list.append(names) if len(color) > 0: data_feature_list.append(color) data = pd.DataFrame(ppd.get_features_data(data_feature_list)) try: fig = px.pie(data_frame=data, values=values, names=names, color=None if len(color) == 0 else color, title=title, height=800) print("fig create success") except: print("fig create error") fig_error = True fig = None if 'histogram_btn' in request.POST: print("Plot Histogram") histogram = Histogram_form(request.POST) histogram.fields['x'].choices = features_name histogram.fields['y'].choices = features_name histogram.fields['facet_row'].choices = category_features_name histogram.fields['facet_col'].choices = category_features_name histogram.fields['color'].choices = category_features_name print(histogram.errors) if histogram.is_valid(): print("Histogram is valid") x = histogram.cleaned_data['x'] y = histogram.cleaned_data['y'] facet_row = histogram.cleaned_data['facet_row'] facet_col = histogram.cleaned_data['facet_col'] facet_col_wrap = histogram.cleaned_data['facet_col_wrap'] color = histogram.cleaned_data['color'] title = histogram.cleaned_data['title'] orientation = histogram.cleaned_data['orientation'] bar_mode = histogram.cleaned_data['bar_mode'] marginal = histogram.cleaned_data['marginal'] bar_norm = histogram.cleaned_data['bar_norm'] hist_norm = histogram.cleaned_data['hist_norm'] hist_func = histogram.cleaned_data['hist_func'] log_x = histogram.cleaned_data['log_x'] log_y = histogram.cleaned_data['log_y'] cumulative = histogram.cleaned_data['cumulative'] data_feature_list = list() data_feature_list.append(x) data_feature_list.append(y) if len(color) > 0: data_feature_list.append(color) if len(facet_row) > 0: data_feature_list.append(facet_row) if len(facet_col) > 0: data_feature_list.append(facet_col) data = pd.DataFrame(ppd.get_features_data(data_feature_list)) try: fig = px.histogram( data_frame=data, x=x, y=y, facet_row=None if len(facet_row) == 0 else facet_row, facet_col=None if len(facet_col) == 0 else facet_col, facet_col_wrap=facet_col_wrap, title=title, color=None if len(color) == 0 else color, orientation=orientation, barmode=bar_mode, marginal=marginal, barnorm=bar_norm, histnorm=hist_norm, histfunc=hist_func, log_x=log_x, log_y=log_y, cumulative=cumulative, height=800) print("Fig create success") except: print("fig create error") fig_error = True fig = None if 'scatter_matrix_btn' in request.POST: print("Scatter Matrix Plot") scatter_matrix = Scatter_matrix_form(request.POST) scatter_matrix.fields['feature_1'].choices = numeric_features_name scatter_matrix.fields['feature_2'].choices = numeric_features_name scatter_matrix.fields['feature_3'].choices = numeric_features_name scatter_matrix.fields['feature_4'].choices = numeric_features_name scatter_matrix.fields['color'].choices = category_features_name scatter_matrix.fields['size'].choices = numeric_features_name scatter_matrix.fields['symbol'].choices = category_features_name if scatter_matrix.is_valid(): print("Scatter Matrix is valid") feature_1 = scatter_matrix.cleaned_data['feature_1'] feature_2 = scatter_matrix.cleaned_data['feature_2'] feature_3 = scatter_matrix.cleaned_data['feature_3'] feature_4 = scatter_matrix.cleaned_data['feature_4'] color = scatter_matrix.cleaned_data['color'] symbol = scatter_matrix.cleaned_data['symbol'] size = scatter_matrix.cleaned_data['size'] title = scatter_matrix.cleaned_data['title'] data_feature_list = list() data_feature_list.append(feature_1) data_feature_list.append(feature_2) data_feature_list.append(feature_3) data_feature_list.append(feature_4) if len(size) > 0: data_feature_list.append(size) if len(color) > 0: data_feature_list.append(color) data = pd.DataFrame(ppd.get_features_data(data_feature_list)) try: fig = px.scatter_matrix( data_frame=data, dimensions=[ feature_1, feature_2, feature_3, feature_4 ], color=None if len(color) == 0 else color, symbol=None if len(symbol) == 0 else symbol, size=None if len(size) == 0 else size, title=title) print("fig create success") except: print("fig create error") fig_error = True fig = None if 'box_btn' in request.POST: box = Box_form(request.POST) box.fields['x'].choices = features_name box.fields['y'].choices = features_name box.fields['facet_row'].choices = category_features_name box.fields['facet_col'].choices = category_features_name box.fields['color'].choices = category_features_name print(box.errors) if box.is_valid(): x = box.cleaned_data['x'] y = box.cleaned_data['y'] facet_row = box.cleaned_data['facet_row'] facet_col = box.cleaned_data['facet_col'] color = box.cleaned_data['color'] facet_col_wrap = box.cleaned_data['facet_col_wrap'] title = box.cleaned_data['title'] orientation = box.cleaned_data['orientation'] log_x = box.cleaned_data['log_x'] log_y = box.cleaned_data['log_y'] box_mode = box.cleaned_data['box_mode'] points = box.cleaned_data['points'] notched = box.cleaned_data['notched'] data_feature_list = list() if len(x) > 0: data_feature_list.append(x) if len(y) > 0: data_feature_list.append(y) if len(color) > 0: data_feature_list.append(color) if len(facet_row) > 0: data_feature_list.append(facet_row) if len(facet_col) > 0: data_feature_list.append(facet_col) data = pd.DataFrame(ppd.get_features_data(data_feature_list)) try: fig = px.box( data_frame=data, x=None if len(x) == 0 else x, y=None if len(y) == 0 else y, facet_row=None if len(facet_row) == 0 else facet_row, facet_col=None if len(facet_col) == 0 else facet_col, facet_col_wrap=facet_col_wrap, title=title, orientation=orientation, log_x=log_x, log_y=log_y, boxmode=box_mode, points=points, notched=notched) print("fig create success") except: print("fig create error") fig_error = True fig = None if 'violin_btn' in request.POST: violin = Violin_form(request.POST) violin.fields['x'].choices = features_name violin.fields['y'].choices = features_name violin.fields['facet_row'].choices = category_features_name violin.fields['facet_col'].choices = category_features_name violin.fields['color'].choices = category_features_name print(violin.errors) if violin.is_valid(): x = violin.cleaned_data['x'] y = violin.cleaned_data['y'] facet_row = violin.cleaned_data['facet_row'] facet_col = violin.cleaned_data['facet_col'] color = violin.cleaned_data['color'] facet_col_wrap = violin.cleaned_data['facet_col_wrap'] title = violin.cleaned_data['title'] orientation = violin.cleaned_data['orientation'] log_x = violin.cleaned_data['log_x'] log_y = violin.cleaned_data['log_y'] violin_mode = violin.cleaned_data['violin_mode'] points = violin.cleaned_data['points'] box = violin.cleaned_data['box'] data_feature_list = list() if len(x) > 0: data_feature_list.append(x) if len(y) > 0: data_feature_list.append(y) if len(color) > 0: data_feature_list.append(color) if len(facet_row) > 0: data_feature_list.append(facet_row) if len(facet_col) > 0: data_feature_list.append(facet_col) data = pd.DataFrame(ppd.get_features_data(data_feature_list)) try: fig = px.violin( data_frame=data, x=None if len(x) == 0 else x, y=None if len(y) == 0 else y, facet_row=None if len(facet_row) == 0 else facet_row, facet_col=None if len(facet_col) == 0 else facet_col, facet_col_wrap=facet_col_wrap, title=title, orientation=orientation, log_x=log_x, log_y=log_y, violinmode=violin_mode, points=points, box=box) print("fig create success") except: print("fig create error") fig_error = True fig = None if 'heat_map_btn' in request.POST: print("Heat Map") heat_map_data = pd.DataFrame(ppd.get_corr_matrix()) print(heat_map_data.columns) try: fig = px.imshow(heat_map_data.astype(float), x=heat_map_data.columns, y=heat_map_data.index, zmax=1, zmin=-1, height=800) except: print("fig create error") fig_error = True fig = None scatter = Scatter_form() scatter.fields['x'].choices = features_name scatter.fields['y'].choices = features_name scatter.fields['facet_row'].choices = category_features_name scatter.fields['facet_col'].choices = category_features_name scatter.fields['color'].choices = category_features_name scatter.fields['size'].choices = numeric_features_name scatter_3d = Scatter_3d_form() scatter_3d.fields['x'].choices = features_name scatter_3d.fields['y'].choices = features_name scatter_3d.fields['z'].choices = features_name scatter_3d.fields['color'].choices = category_features_name scatter_3d.fields['size'].choices = numeric_features_name line = Line_form() line.fields['x'].choices = features_name line.fields['y'].choices = features_name line.fields['facet_row'].choices = category_features_name line.fields['facet_col'].choices = category_features_name line.fields['color'].choices = category_features_name bar = Bar_form() bar.fields['x'].choices = features_name bar.fields['y'].choices = features_name bar.fields['facet_row'].choices = category_features_name bar.fields['facet_col'].choices = category_features_name bar.fields['color'].choices = category_features_name pie = Pie_form() pie.fields['values'].choices = features_name pie.fields['names'].choices = category_features_name pie.fields['color'].choices = category_features_name histogram = Histogram_form() histogram.fields['x'].choices = features_name histogram.fields['y'].choices = features_name histogram.fields['facet_row'].choices = category_features_name histogram.fields['facet_col'].choices = category_features_name histogram.fields['color'].choices = category_features_name scatter_matrix = Scatter_matrix_form() scatter_matrix.fields['feature_1'].choices = numeric_features_name scatter_matrix.fields['feature_2'].choices = numeric_features_name scatter_matrix.fields['feature_3'].choices = numeric_features_name scatter_matrix.fields['feature_4'].choices = numeric_features_name scatter_matrix.fields['color'].choices = category_features_name scatter_matrix.fields['size'].choices = numeric_features_name scatter_matrix.fields['symbol'].choices = category_features_name box = Box_form() box.fields['x'].choices = features_name box.fields['y'].choices = features_name box.fields['facet_row'].choices = category_features_name box.fields['facet_col'].choices = category_features_name box.fields['color'].choices = category_features_name violin = Violin_form() violin.fields['x'].choices = features_name violin.fields['y'].choices = features_name violin.fields['facet_row'].choices = category_features_name violin.fields['facet_col'].choices = category_features_name violin.fields['color'].choices = category_features_name context = { 'fig': None, 'scatter': scatter, 'line': line, 'scatter_3d': scatter_3d, 'bar': bar, 'pie': pie, 'histogram': histogram, 'scatter_matrix': scatter_matrix, 'box': box, 'violin': violin } if fig is not None: context['fig'] = pio.to_html(fig=fig, full_html=False, include_plotlyjs=False) elif fig_error is True: context[ 'fig'] = "Plot Graph Error When Setting Parameters. Please Try Again!" else: context['fig'] = None return render(request, 'data_cleaning_app/plot_graphs.html', context=context)
def fun_monit(score_pred, score_mod): external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css'] app = dash.Dash(__name__, external_stylesheets=external_stylesheets) colors = { 'background': '#111111', 'text': '#7FDBFF' } # assume you have a "long-form" data frame # see https://plotly.com/python/px-arguments/ for more options df1 = score_mod df2 = score_pred fig1 = px.histogram(df1, x='score_1', barmode="overlay") fig2 = px.histogram(df2, x='score_1', barmode="overlay") fig1.update_layout( plot_bgcolor=colors['background'], paper_bgcolor=colors['background'], font_color=colors['text'] ) fig2.update_layout( plot_bgcolor=colors['background'], paper_bgcolor=colors['background'], font_color=colors['text'] ) app.layout = html.Div(children=[ # All elements from the top of the page html.Div([ html.H1(children='Histograma de Scores Modelo'), html.Div(children=''' Dash: A web application framework for Python. '''), dcc.Graph( id='graph1', figure=fig1 ), ]), # New Div for all elements in the new 'row' of the page html.Div([ html.H1(children='Histograma de Scores Predicciones'), html.Div(children=''' Dash: A web application framework for Python. '''), dcc.Graph( id='graph2', figure=fig2 ), ]), ]) app.run_server(host='0.0.0.0', port=8050,debug=True)
fig.show() # In[23]: outliers = np.where(clusters == -1) df_X_db = df_X.drop(list(outliers[0])) df_Y_db = df_Y.drop(list(outliers[0])) df_dbScan = result = pd.concat([df_X_db, df_Y_db], axis=1, sort=False) df_dbScan.to_csv(r'Filtered_DBSCAN.csv', index=False, header=True) print(df_dbScan.head()) # In[24]: sns.countplot(x='sex_b', data=df_dbScan) fig = px.histogram(df_dbScan, x="sex_b", color="sex_b") fig.update_layout(barmode='group') fig.show() # In[25]: X_train, X_test, y_train, y_test = train_test_split(df_X_db, df_Y_db, test_size=.33, random_state=123) clf = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None) models, predictions = clf.fit(X_train, X_test, y_train, y_test) models # ## Isolation Forests
className="pretty_container three columns", style={ "width": "inherit", }), ], className="row container-display", style={ # "margin-left":"-60px", "width": "100%" }), html.Div( [ html.Div([ dcc.Graph(figure=px.histogram( df, x='Subjectivity', title='Histogram on Subjectivity of Sentiments', nbins=20, )) ], className="pretty_container three columns", style={ "width": "inherit", }), html.Div([ dcc.Graph(figure=px.histogram( df, x='Sentiments', title='Histogram of frequency distribution Sentiments', )) ], className="pretty_container three columns",
import csv import plotly.express as px data_list = [[], [], [], []] with open('задание 14.csv', newline='') as csvfile: data = csv.reader(csvfile, delimiter=';') for row in data: for i in range(4): data_list[i].append(row[i]) data_dict = {data_list[0][0]: data_list[0][1:], data_list[1][0]: data_list[1][1:], data_list[2][0]: data_list[2][1:], data_list[3][0]: data_list[3][1:]} fig = px.histogram(data_dict, x='округ', y='балл', color="предмет", template='presentation') fig.show()
#print(df.iloc[:5, [2,3,5,10]]) #print(df.Genre.nunique()) #print(df.Genre.unique()) #print(sorted(df.Year.unique())) # Data Visualization with Plotly (Python) # ----------------------------------------------------------------- #fig_pie = px.pie(data_frame=df, names='Genre', values='Japan Sales') #fig_pie = px.pie(data_frame=df, names='Genre', values='North American Sales') #fig_pie.show() #fig_bar = px.bar(data_frame=df, x='Genre', y='Japan Sales') #fig_bar.show() fig_hist = px.histogram(data_frame=df, x='Year', y='Japan Sales') #fig_hist.show() # Interactive Graphs with Dash (Python, R, Julia) # ----------------------------------------------------------------- import dash_core_components as dcc import dash_html_components as html from dash.dependencies import Output, Input app = dash.Dash(__name__) app.layout = html.Div([ html.H1("Graph Analysis with Charming Data"), dcc.Dropdown(id='genre-choice', options=[{
import pytest @pytest.mark.skipif( not hasattr(pd.options.plotting, "backend"), reason="Currently installed pandas doesn't support plotting backends.", ) @pytest.mark.parametrize( "pandas_fn,px_fn", [ (lambda df: df.plot(), px.line), ( lambda df: df.plot.scatter("A", "B"), lambda df: px.scatter(df, "A", "B"), ), (lambda df: df.plot.line(), px.line), (lambda df: df.plot.area(), px.area), (lambda df: df.plot.bar(), px.bar), (lambda df: df.plot.barh(), lambda df: px.bar(df, orientation="h")), (lambda df: df.plot.box(), px.box), (lambda df: df.plot.hist(), px.histogram), (lambda df: df.boxplot(), px.box), (lambda df: df.hist(), px.histogram), (lambda df: df["A"].hist(), lambda df: px.histogram(df["A"])), ], ) def test_pandas_equiv(pandas_fn, px_fn): pd.options.plotting.backend = "plotly" df = pd.DataFrame(np.random.randn(100, 4), columns=list("ABCD")).cumsum() assert pandas_fn(df) == px_fn(df)
import dash import dash_bootstrap_components as dbc import dash_core_components as dcc import dash_html_components as html from dash.dependencies import Input, Output import pandas as pd import plotly.express as px # Imports from this application from app import app web_url = 'https://gist.githubusercontent.com/dggasque/cf034a3ce095830bd891b5af42987fb0/raw/7223d727d1aa65ad915575fe9ff46d7ee1be0f0f/mushroom_mapped.csv' df = pd.read_csv(web_url) fig1 = px.histogram(df, x="odor", color="class") # 1 column layout # https://dash-bootstrap-components.opensource.faculty.ai/l/components/layout column1 = dbc.Col([ dcc.Markdown(""" ## Process ### About the Data The data for this project was originally donated to the UCI Machine Learning Repository in 1987. Mushroom records are derived from *The Audubon Society Feild Guide to North American Mushrooms*(1981). The guide identifies each species of mushroom as edible, definitely poisonous, or of unknown edibility. The data set combines the latter class into the class poisonous. There are 22 categorical features that can be used to predict whether a mushroom is edible or poisonous.
#!/usr/bin/env python # coding: utf-8 # In[ ]: from jupyter_dash import jupyter_dash import dash_core_components as dcc import dash_html_components as html from dash.dependencies import Input, Output import plotly.express as px import pandas as pd df_pu = pd.read_csv("df_pu.csv") fig = px.histogram( df_pu, x="City", y="Sales_pu", histfunc="sum").update_layout(xaxis={'categoryorder': 'sum descending'}) y_data = ["Sales_pu", "Profit_pu", "Discount", "Quantity"] x_data = [ 'Ship Mode', 'Segment', 'City', 'State', 'Region', 'Category', 'Sub-Category' ] functions = ["sum", "min", "max", "avg"] app = dash.Dash(__name__) server = app.server app.layout = html.Div( [ html.H1(children=" Interactive Visualization Dashboard", style={
def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping): if column_mapping: date_column = column_mapping.get('datetime') id_column = column_mapping.get('id') target_column = column_mapping.get('target') prediction_column = column_mapping.get('prediction') num_feature_names = column_mapping.get('numerical_features') target_names = column_mapping.get('target_names') if num_feature_names is None: num_feature_names = [] else: num_feature_names = [ name for name in num_feature_names if is_numeric_dtype(reference_data[name]) ] cat_feature_names = column_mapping.get('categorical_features') if cat_feature_names is None: cat_feature_names = [] else: cat_feature_names = [ name for name in cat_feature_names if is_numeric_dtype(reference_data[name]) ] else: date_column = 'datetime' if 'datetime' in reference_data.columns else None id_column = None target_column = 'target' if 'target' in reference_data.columns else None prediction_column = 'prediction' if 'prediction' in reference_data.columns else None utility_columns = [ date_column, id_column, target_column, prediction_column ] target_names = None num_feature_names = list( set(reference_data.select_dtypes([np.number]).columns) - set(utility_columns)) cat_feature_names = list( set(reference_data.select_dtypes([np.object]).columns) - set(utility_columns)) if prediction_column is not None and target_column is not None: binaraizer = preprocessing.LabelBinarizer() binaraizer.fit(reference_data[target_column]) binaraized_target = binaraizer.transform( reference_data[target_column]) if production_data is not None: ref_array_prediction = reference_data[ prediction_column].to_numpy() ref_prediction_ids = np.argmax(ref_array_prediction, axis=-1) ref_prediction_labels = [ prediction_column[x] for x in ref_prediction_ids ] reference_data['prediction_labels'] = ref_prediction_labels prod_array_prediction = production_data[ prediction_column].to_numpy() prod_prediction_ids = np.argmax(prod_array_prediction, axis=-1) prod_prediction_labels = [ prediction_column[x] for x in prod_prediction_ids ] production_data['prediction_labels'] = prod_prediction_labels additional_graphs_data = [] params_data = [] for feature_name in num_feature_names + cat_feature_names: #add data for table in params labels = prediction_column params_data.append({ "details": { "parts": [{ "title": "All", "id": "All" }] + [{ "title": str(label), "id": feature_name + "_" + str(label) } for label in labels], "insights": [] }, "f1": feature_name }) #create confusion based plots reference_data['dataset'] = 'Reference' production_data['dataset'] = 'Production' merged_data = pd.concat([reference_data, production_data]) fig = px.histogram(merged_data, x=feature_name, color=target_column, facet_col="dataset", histnorm='', category_orders={ "dataset": ["Reference", "Production"] }) fig_json = json.loads(fig.to_json()) #write plot data in table as additional data additional_graphs_data.append( AdditionalGraphInfo( "All", { "data": fig_json['data'], "layout": fig_json['layout'] }, )) for label in labels: merged_data['Confusion'] = merged_data.apply(lambda x : 'TP' if (x['target'] == label and x['prediction_labels'] == label) else ('FP' if(x['target'] != label and x['prediction_labels'] == label) else \ ('FN' if (x['target'] == label and x['prediction_labels'] != label) else 'TN')), axis = 1) fig = px.histogram(merged_data, x=feature_name, color='Confusion', facet_col="dataset", histnorm='', category_orders={ "dataset": ["Reference", "Production"], "Confusion": ["TP", "TN", "FP", "FN"] }) fig_json = json.loads(fig.to_json()) #write plot data in table as additional data additional_graphs_data.append( AdditionalGraphInfo( feature_name + "_" + str(label), { "data": fig_json['data'], "layout": fig_json['layout'] }, )) self.wi = BaseWidgetInfo( title=self.title, type="big_table", details="", alertStats=AlertStats(), alerts=[], alertsPosition="row", insights=[], size=2, params={ "rowsPerPage": min( len(num_feature_names) + len(cat_feature_names), 10), "columns": [{ "title": "Feature", "field": "f1" }], "data": params_data }, additionalGraphs=additional_graphs_data) else: ref_array_prediction = reference_data[ prediction_column].to_numpy() ref_prediction_ids = np.argmax(ref_array_prediction, axis=-1) ref_prediction_labels = [ prediction_column[x] for x in ref_prediction_ids ] reference_data['prediction_labels'] = ref_prediction_labels additional_graphs_data = [] params_data = [] for feature_name in num_feature_names + cat_feature_names: #add data for table in params labels = prediction_column params_data.append({ "details": { "parts": [{ "title": "All", "id": "All" }] + [{ "title": str(label), "id": feature_name + "_" + str(label) } for label in labels], "insights": [] }, "f1": feature_name }) #create confusion based plots fig = px.histogram(reference_data, x=feature_name, color=target_column, histnorm='') fig_json = json.loads(fig.to_json()) #write plot data in table as additional data additional_graphs_data.append( AdditionalGraphInfo( "All", { "data": fig_json['data'], "layout": fig_json['layout'] }, )) for label in labels: reference_data['Confusion'] = reference_data.apply(lambda x : 'TP' if (x['target'] == label and x['prediction_labels'] == label) else ('FP' if(x['target'] != label and x['prediction_labels'] == label) else \ ('FN' if (x['target'] == label and x['prediction_labels'] != label) else 'TN')), axis = 1) fig = px.histogram(reference_data, x=feature_name, color='Confusion', histnorm='', category_orders={ "Confusion": ["TP", "TN", "FP", "FN"] }) fig_json = json.loads(fig.to_json()) #write plot data in table as additional data additional_graphs_data.append( AdditionalGraphInfo( feature_name + "_" + str(label), { "data": fig_json['data'], "layout": fig_json['layout'] }, )) self.wi = BaseWidgetInfo( title=self.title, type="big_table", details="", alertStats=AlertStats(), alerts=[], alertsPosition="row", insights=[], size=2, params={ "rowsPerPage": min( len(num_feature_names) + len(cat_feature_names), 10), "columns": [{ "title": "Feature", "field": "f1" }], "data": params_data }, additionalGraphs=additional_graphs_data) else: self.wi = None
def histogram(self, **kwargs): return px.histogram(data_frame=self.df, **kwargs)
def main(): # generando un dataframe data = get_data() # entrenando el modelo model = train_model() # título st.title( "Data App - Prediciendo el Valor de Inmuebles de la Ciudad de Boston") # subtítulo st.info( "Este es un App de Predicción de Machine Learning utilizado para exibir el problema de predicción de valores de inmuebles de la ciudad de Boston." ) # verificando el dataset st.subheader( "Seleccione el conjunto de características de la base de datos") # atributos que son exibidos por default defaultcols = ["RM", "PTRATIO", "LSTAT", "MEDV"] # definiendo atributos a partir de multiselect cols = st.multiselect("Características", data.columns.tolist(), default=defaultcols) # exibiendo los top 10 registros del dataframe st.dataframe(data[cols].head(10)) st.subheader("Distribución de inmuebles por precio") # definienndo el rango de valores faixa_valores = st.slider("Rango de precios", float(data.MEDV.min()), 150., (10.0, 100.0)) # filtrando los datos dados = data[data['MEDV'].between(left=faixa_valores[0], right=faixa_valores[1])] # plot la distribuicion de los datos f = px.histogram(dados, x="MEDV", nbins=100, title="Distribución de Precios") f.update_xaxes(title="MEDV") f.update_yaxes(title="Total de Inmuebles") st.plotly_chart(f) st.sidebar.subheader("Defina los atributos del inmueble para predicción") # mapeando datos de usuário para cada atributo crim = st.sidebar.number_input("Tasa de Criminalidad", value=data.CRIM.mean()) indus = st.sidebar.number_input("Proporción de Hectares de Negócio", value=data.CRIM.mean()) chas = st.sidebar.selectbox("Tiene límite con el río?", ("Si", "No")) # transformando los datos de entrada en valor binário chas = 1 if chas == "Si" else 0 nox = st.sidebar.number_input("Concentración de óxido nítrico", value=data.NOX.mean()) rm = st.sidebar.number_input("Número de Cuartos", value=1) ptratio = st.sidebar.number_input("Índice de alunos para profesores", value=data.PTRATIO.mean()) b = st.sidebar.number_input( "Proporción de personar de descendencia afro-americana", value=data.B.mean()) lstat = st.sidebar.number_input("Porcentaje de status bajo", value=data.LSTAT.mean()) # insertando un boton en la pantalla btn_predict = st.sidebar.button("Realizar Predicción") #Agradecimiento st.sidebar.info("Desarrollado por Juan Minango") # verifica se o botão foi acionado if btn_predict: result = model.predict( [[crim, indus, chas, nox, rm, ptratio, b, lstat]]) st.subheader( "El valor previsto para el inmueble con las caracteristicas escojidas es:" ) result = "US $ " + str(round(result[0] * 10, 2)) st.write(result)
def request_resource(self, turbine): """ A wind turbine has requested a vessel to fix a failure. This will either allocate a vessel to the turbine or let it know to wait for a period before it will try to allocate a vessel again. """ return self.CTVs[0] print('Wind Site') #random.seed(RANDOM_SEED) env = simpy.Environment() CTVs = [simpy.PreemptiveResource(env, capacity=1000)] resource_manager = resource_manager(env, "rm1", CTVs) turbines = [ turbine(env, 'Turbine %d' % i, resource_manager) for i in range(NUM_TURBINES) ] env.run(until=SIM_TIME) turbine_data = pd.DataFrame([]) turbine_data["Uptime"] = [(turbines[i].power / SIM_TIME) * 100 for i in range(NUM_TURBINES)] turbine_data["Failures"] = [ turbines[i].num_failures for i in range(NUM_TURBINES) ] fig1 = px.histogram(turbine_data, x="Uptime", nbins=100) fig1.show() fig2 = px.histogram(turbine_data, x="Failures", nbins=50) fig2.show()
# %matplotlib inline df = pd.read_csv('Heart Disease.csv') df.head() df.isnull().sum() df.value_counts('target') df.value_counts('target').iloc[0] / len(df) """Visualisation""" import plotly.express as ex fig = ex.histogram(x=df['target'], color=df['sex']) fig.show() df.corr()['target'].sort_values() plt.figure(figsize=(20, 12)) sns.countplot(x=df['age'], hue=df['target']) """Train Test Split""" X = df.drop(['target', 'sex', 'age', 'trestbps', 'chol', 'fbs', 'restecg'], axis=1) y = df['target'] from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X,
values=plot_sentiment(choice[j]).Tweets, showlegend=True), i + 1, j + 1) fig_3.update_layout(height=600, width=800) st.plotly_chart(fig_3) st.sidebar.subheader("Breakdown airline by sentiment") choice = st.sidebar.multiselect('Pick airlines', ('US Airways', 'United', 'American', 'Southwest', 'Delta', 'Virgin America'), key=0) if len(choice) > 0: choice_data = data[data.airline.isin(choice)] fig_0 = px.histogram(choice_data, x='airline', y='airline_sentiment', histfunc='count', color='airline_sentiment', facet_col='airline_sentiment', labels={'airline_sentiment': 'tweets'}, height=600, width=800) st.plotly_chart(fig_0) st.sidebar.header("Word Cloud") word_sentiment = st.sidebar.radio('Display word cloud for what sentiment?', ('positive', 'neutral', 'negative')) if not st.sidebar.checkbox("Close", False, key='3'): st.subheader('Word cloud for %s sentiment' % (word_sentiment)) df = data[data['airline_sentiment'] == word_sentiment] words = ' '.join(df['text']) processed_words = ' '.join([ word for word in words.split()
import pandas as pd import plotly.express as px from constants import FILE df = pd.read_csv(FILE) st.title('Projeto prático do tutorial da Python Brasil') st.markdown("## Nutrition Facts for McDonald's Menu") categories = df['Category'].unique().tolist() st.markdown("### Describe") if st.checkbox('Apresentar describe símples'): st.write(df.describe()) if st.checkbox('Apresentar describe agrupado por categoria'): category = st.selectbox( 'Selecione uma categoria para mostrar um describe da categoria.', categories) st.write(df.loc[df['Category'] == category].describe()) if st.checkbox('Mostrar dataframe'): st.dataframe(df) if st.checkbox('Mostrar dataframe como tabela'): st.table(df) columns = categories = df.columns.tolist() column = st.selectbox('Selecione uma coluna para plotar o histograma.', columns) fig = px.histogram(df, x=column) st.plotly_chart(fig)