def get_model_div(key): models = db.get('models') model = models[key] summary = model['summary'] tag = model['tag'] type = model['type'] params = model['params'] anova = model['anova'] x_col = model['x_col'] y_col = model['y_col'] error_mean = model['error_mean'] df_stats = common.get_stats_df(summary, x_col, y_col) stats_div = dbc.Table.from_dataframe(df_stats, striped=True, bordered=True, hover=True, style=common.table_style) df_coeff = common.get_coeff_df(params, x_col) coeff_div = dbc.Table.from_dataframe(df_coeff, striped=True, bordered=True, hover=True, style=common.table_style) anova_div = common.get_anova_div(anova) div = html.Div([ html.H2("Model: " + key), html.H2("Tag: Tag" + str(tag)), html.H2("Type: " + type), html.H2('Statistics Summary Table'), stats_div, html.H2('Linear Regression Coefficients'), coeff_div, html.H2('Error Mean = ' + str(round(error_mean, 4))), html.Br(), html.H2('Anova'), anova_div ]) return div
def linear_regression(n): global df_cleaned file = db.get('lr.file') if file is None: file = 'empty' path = FileUtils.path('clean', file) df_cleaned = pd.read_csv(path) tdf = df_cleaned.head(10).round(4) div = [ html.Div(children=[ html.H2(children='Cleaned Data: ' + file), html.H2(children='Tag: Tag ' + str(db.get('tags')[file])), dbc.Table.from_dataframe(tdf, striped=True, bordered=True, hover=True, style=common.table_style) ]), html.Hr(), html.H3(children='Variable Selection and Plotting'), html.Div([ html.Div([ html.Div(id='ordered-df', style={'display': 'none'}), html.Hr(), html.Label('Select X-axis variable for scatter plot'), dcc.Dropdown(id='x-var-plot', options=[{ 'label': i, 'value': i } for i in df_cleaned.columns], multi=False), html.Label('Select Y-axis variable for scatter plot'), dcc.Dropdown(id='y-var-plot', options=[{ 'label': i, 'value': i } for i in df_cleaned.columns], multi=False), html.Br(), html.H2('Perform Linear Regression'), html.Label('Select X variable from Dropdown'), dcc.Dropdown(id='x-var-selection', options=[{ 'label': i, 'value': i } for i in df_cleaned.columns], multi=True), html.Label('Select Y variable from Dropdown'), dcc.Dropdown(id='y-var-selection', options=[{ 'label': i, 'value': i } for i in df_cleaned.columns], multi=False), ], style={ 'width': '48%', 'display': 'inline-block' }), html.Div([ html.Label('Scatter Plot'), dcc.Graph(id='scatter-plot'), ], style={ 'width': '48%', 'float': 'right', 'display': 'inline-block' }), ]), html.Hr(), html.Div([ html.Div([], id='linear-regression-status'), html.Br(), html.H2('Statistics Summary Table'), html.Table(id='stats_table'), html.H2('Linear Regression Coefficients'), html.Table(id='coeff_table'), html.H2('Plot') ]), html.Br(), html.Div([ dcc.Graph(id='lr-y-ycap-plot', figure=y_ycap_fig), dcc.Graph(id='lr-error-plot', figure=error_fig), html.Div([], id='lr-error-mean') ]), html.Div([ html.Hr(), html.H2('ANOVA Table'), html.Div([], id='lr-anova-table'), ]), html.Div([ html.Hr(), dbc.Label( 'Predict Data (pass comma separated) Dependent Variables'), dbc.Input(id="lr-predict-data", placeholder="Model Name", type="text"), html.Br(), dbc.Button("Predict", color="primary", id='lr-predict'), html.Div([], id='lr-predict-display'), html.Div([], id='lr-predict-data-do-nothing'), ]), html.Div([ html.Hr(), dbc.Label('Save Model'), dbc.Input(id="lr-save-model", placeholder="Model Name", type="text"), html.Br(), dbc.Button("Save", color="primary", id='lr-save'), html.Div([], id='lr-save-display'), html.Div([], id='lr-save-model-do-nothing'), ]) ] return div
def stats_table_and_linear_regression(json_ordered_data): if json_ordered_data is None: return (common.msg(None), generate_table(pd.DataFrame(columns=[])), generate_table(pd.DataFrame(columns=[])), y_ycap_fig, error_fig, "", "") dff = pd.read_json(json_ordered_data, orient='split') col = list(dff.columns) y = list(dff[col[-1]]) data = [] x_col = col[:-1] y_col = col[-1] data = [[] for i in range(len(x_col))] for i in range(len(x_col)): x = dff[x_col[i]].values.tolist() data[i] = x ##Team 3 API Integration try: model = LinearRegression() db.put("lr.model", model) db.put("lr.x_col", x_col) db.put("lr.y_col", y_col) (summary, params, ycap) = model.fit(data, y) db.put("lr.summary", summary) db.put("lr.params", params) db.put("lr.ycap", ycap) error_mean = model.model_stats()['mean'] db.put("lr.error_mean", error_mean) except (Exception, ValueError) as e: return (common.error_msg("Linear Regression API Error: " + str(e)), generate_table(pd.DataFrame(columns=[])), generate_table(pd.DataFrame(columns=[])), y_ycap_fig, error_fig, "", "") df_stats = common.get_stats_df(summary, x_col, y_col) table1 = dbc.Table.from_dataframe(df_stats, striped=True, bordered=True, hover=True, style=common.table_style) df_coeff = common.get_coeff_df(params, x_col) table2 = dbc.Table.from_dataframe(df_coeff, striped=True, bordered=True, hover=True, style=common.table_style) trace_1 = go.Scatter(x=list(range(len(y))), y=ycap, name='Y Predicted (ŷ)', line=dict(width=2, color='rgb(229, 151, 50)')) trace_2 = go.Scatter(x=list(range(len(y))), y=y, name='Y Actual', line=dict(width=2, color='rgb(106, 181, 135)')) ydiff = [y[i] - ycap[i] for i in range(len(y))] trace_3 = go.Scatter(x=list(range(len(y))), y=ydiff, line=dict(width=2, color='rgb(236, 10, 15)')) fig1 = go.Figure(data=[trace_1, trace_2], layout=y_ycap_title) fig2 = go.Figure(data=[trace_3], layout=error_title) error_mean = html.H2('Error Mean = ' + str(round(db.get('lr.error_mean'), 4))) ##Team 5 API Integration anova = get_anova(y, ycap, len(params)) db.put('lr.anova', anova) anova_div = common.get_anova_div(anova) return ( common.success_msg("Linear Regression API Exceuted Successfully!!"), table1, table2, fig1, fig2, error_mean, anova_div)
def lr_save_model(n_clicks): model_name = db.get('cl.model_name') if model_name is None or model_name == '': return ("", "") file = db.get("lr.file") model = db.get("lr.model") params = db.get("lr.params") anova = db.get("lr.anova") error_mean = db.get('lr.error_mean') summary = db.get("lr.summary") x_col = db.get("lr.x_col") y_col = db.get("lr.y_col") m = { 'file': file, 'tag': db.get('tags')[file], 'type': 'linear', 'name': model_name, 'model': model, 'params': params, 'anova': anova, 'summary': summary, 'x_col': x_col, 'y_col': y_col, 'error_mean': error_mean } models = db.get('models') if models is None: models = {} db.put('models', models) models[model_name] = m return (common.success_msg('Model "' + model_name + '" Saved Successfully.'), "")
from dash.dependencies import Input, Output, State import dash_table import plotly.graph_objs as go import pandas as pd from dataanalytics.ux.app import app from dataanalytics.ux.apps import common from dataanalytics.ux.apps.common import * from dataanalytics.framework.database import db from dataanalytics.framework.file_utils import FileUtils from dataanalytics.framework.data_utils import DataUtils from dataanalytics.stats_linear_regression.linear_regression import LinearRegression from dataanalytics.stat_anova.anova import get_anova file = db.get('lr.file') if file is None: file = 'empty' path = FileUtils.path('clean', file) df_cleaned = pd.read_csv(path) y_ycap_title = go.Layout(title='Actual vs Predicted Y Plot', hovermode='closest', xaxis={'title': 'Sequence of data points'}, yaxis={'title': 'y,ŷ'}) y_ycap_fig = go.Figure(data=[], layout=y_ycap_title) error_title = go.Layout(title='Error Plot', hovermode='closest', xaxis={'title': 'Sequence of data points'}, yaxis={'title': 'Error = y - ŷ'})
def hor_save_model(n_clicks): model_name = db.get('hor.model_name') if model_name is None or model_name == '': return ("", "") file = db.get("hor.file") model = db.get("hor.model") order = db.get("hor.order") params = db.get("hor.params") anova = db.get("hor.anova") error_mean = db.get('hor.error_mean') summary = db.get("hor.summary") x_col = db.get("hor.x_col") y_col = db.get("hor.y_col") m = { 'file': file, 'tag': db.get('tags')[file], 'type': 'Higher Order Polynomial', 'name': model_name, 'model': model, 'order': order, 'params': params, 'no_of_coeff': len(params), 'anova': anova, 'summary': summary, 'x_col': x_col, 'y_col': y_col, 'error_mean': error_mean } models = db.get('models') if models is None: models = {} db.put('models', models) models[model_name] = m return (common.success_msg('Model "' + model_name + '" Saved Successfully.'), "")
def stats_table_and_hor_regression(json_ordered_data, hor_order): if json_ordered_data is None or hor_order is None: return (common.msg(None), generate_table(pd.DataFrame(columns=[])), generate_table(pd.DataFrame(columns=[])), y_ycap_fig, error_fig, "", "", "") dff = pd.read_json(json_ordered_data, orient='split') col = list(dff.columns) x_col = [col[0]] y_col = col[1] dff = dff.sort_values(by=x_col) x = list(dff[col[0]]) y = list(dff[col[1]]) ##Team 4 API Integration try: db.put("hor.x_col", x_col) db.put("hor.y_col", y_col) (ycap, params) = Building_model_equation(x, y, hor_order) print(params) params = params.tolist() print(params) db.put("hor.params", params) db.put("hor.ycap", ycap) db.put("hor.order", hor_order) error = 10.0 for i in range(len(y)): error += y[i] - ycap[i] error_mean = error / len(y) db.put("hor.error_mean", error_mean) db.put("hor.error_mean", error_mean) ## Team 3 API call for Summary Statistics model = LinearRegression() (summary, params_ignore, ycap_ignore) = model.fit([x], y) db.put("hor.summary", summary) except (Exception, ValueError) as e: return (common.error_msg("Higher Order Regression API Error: " + str(e)), generate_table(pd.DataFrame(columns=[])), generate_table(pd.DataFrame(columns=[])), y_ycap_fig, error_fig, "", "", "") df_stats = common.get_stats_df(summary, x_col, y_col) table1 = dbc.Table.from_dataframe(df_stats, striped=True, bordered=True, hover=True, style=common.table_style) df_coeff = common.hor_get_coeff_df(params) table2 = dbc.Table.from_dataframe(df_coeff, striped=True, bordered=True, hover=True, style=common.table_style) trace_actual = go.Scatter(x=x, y=y, name='Y Actual', mode='markers', marker=dict(color='rgb(106, 181, 135)')) trace_predict = go.Scatter(x=x, y=ycap, name='Y Predicted (ŷ)', line=dict(width=2, color='rgb(229, 151, 50)')) ydiff = [y[i] - ycap[i] for i in range(len(y))] trace_error = go.Scatter(x=x, y=ydiff, line=dict(width=2, color='rgb(236, 10, 15)')) x_title = "x (" + str(x_col[0]) + ")" y_title = "y,ŷ(" + str(y_col) + ")" y_ycap_title = go.Layout(title='Actual vs Predicted Y Plot', hovermode='closest', xaxis={'title': x_title}, yaxis={'title': y_title}) error_title = go.Layout(title='Error Plot', hovermode='closest', xaxis={'title': x_title}, yaxis={'title': 'Error = y - ŷ'}) fig1 = go.Figure(data=[trace_predict, trace_actual], layout=y_ycap_title) fig2 = go.Figure(data=[trace_error], layout=error_title) error_mean = html.H2('Error Mean = ' + str(round(db.get('hor.error_mean'), 4))) ##Team 5 API Integration anova = get_anova(y, ycap, len(params)) db.put('hor.anova', anova) anova_div = common.get_anova_div(anova) independent_var = ','.join(x_col) return (common.success_msg( "Higher Order Regression API Exceuted Successfully!!"), table1, table2, fig1, fig2, error_mean, anova_div, html.H2(independent_var))
import dash_table import plotly.graph_objs as go import pandas as pd from dataanalytics.ux.app import app from dataanalytics.ux.apps import common from dataanalytics.ux.apps.common import * from dataanalytics.framework.database import db from dataanalytics.framework.file_utils import FileUtils from dataanalytics.framework.data_utils import DataUtils from dataanalytics.higher_order_regression.higher_order_polynomial_V1 import * from dataanalytics.stats_linear_regression.linear_regression import LinearRegression from dataanalytics.stat_anova.anova import get_anova file = db.get('hor.file') if file is None: file = 'empty' path = FileUtils.path('clean', file) df_cleaned = pd.read_csv(path) y_ycap_title = go.Layout(title='Actual vs Predicted Y Plot', hovermode='closest', xaxis={'title': 'Sequence of data points'}, yaxis={'title': 'y,ŷ'}) y_ycap_fig = go.Figure(data=[], layout=y_ycap_title) error_title = go.Layout(title='Error Plot', hovermode='closest', xaxis={'title': 'Sequence of data points'}, yaxis={'title': 'Error = y - ŷ'})
def stats_table_and_linear_regression(json_ordered_data): if json_ordered_data is None: return (common.msg(None), generate_table(pd.DataFrame(columns=[])), generate_table(pd.DataFrame(columns=[])), y_ycap_fig, error_fig, "", "", "") dff = pd.read_json(json_ordered_data, orient='split') col = list(dff.columns) x_col = col[:-1] y_col = col[-1] if len(x_col) == 1: dff = dff.sort_values(by=x_col) data = [[] for i in range(len(x_col))] for i in range(len(x_col)): x = dff[x_col[i]].values.tolist() data[i] = x y = list(dff[col[-1]]) ##Team 3 API Integration try: model = LinearRegression() db.put("lr.model", model) db.put("lr.x_col", x_col) db.put("lr.y_col", y_col) (summary, params, ycap) = model.fit(data, y) db.put("lr.summary", summary) db.put("lr.params", params) db.put("lr.ycap", ycap) error_mean = model.model_stats()['mean'] db.put("lr.error_mean", error_mean) except (Exception, ValueError) as e: return (common.error_msg("Linear Regression API Error: " + str(e)), generate_table(pd.DataFrame(columns=[])), generate_table(pd.DataFrame(columns=[])), y_ycap_fig, error_fig, "", "", "") df_stats = common.get_stats_df(summary, x_col, y_col) table1 = dbc.Table.from_dataframe(df_stats, striped=True, bordered=True, hover=True, style=common.table_style) df_coeff = common.get_coeff_df(params, x_col) table2 = dbc.Table.from_dataframe(df_coeff, striped=True, bordered=True, hover=True, style=common.table_style) if len(data) == 1: trace_x = data[0] x_title = "x (" + str(x_col[0]) + ")" trace_actual = go.Scatter(x=trace_x, y=y, name='Y Actual', mode='markers', marker=dict(color='rgb(106, 181, 135)')) else: trace_x = list(range(len(y))) x_title = 'Sequence of data points' trace_actual = go.Scatter(x=trace_x, y=y, name='Y Actual', line=dict(width=2, color='rgb(106, 181, 135)')) trace_predict = go.Scatter(x=trace_x, y=ycap, name='Y Predicted (ŷ)', line=dict(width=2, color='rgb(229, 151, 50)')) ydiff = [y[i] - ycap[i] for i in range(len(y))] trace_error = go.Scatter(x=trace_x, y=ydiff, line=dict(width=2, color='rgb(236, 10, 15)')) y_title = "y,ŷ(" + str(y_col) + ")" y_ycap_title = go.Layout(title='Actual vs Predicted Y Plot', hovermode='closest', xaxis={'title': x_title}, yaxis={'title': y_title}) error_title = go.Layout(title='Error Plot', hovermode='closest', xaxis={'title': x_title}, yaxis={'title': 'Error = y - ŷ'}) fig1 = go.Figure(data=[trace_predict, trace_actual], layout=y_ycap_title) fig2 = go.Figure(data=[trace_error], layout=error_title) error_mean = html.H2('Error Mean = ' + str(round(db.get('lr.error_mean'), 4))) ##Team 5 API Integration anova = get_anova(y, ycap, len(params)) db.put('lr.anova', anova) anova_div = common.get_anova_div(anova) independent_var = ','.join(x_col) return ( common.success_msg("Linear Regression API Exceuted Successfully!!"), table1, table2, fig1, fig2, error_mean, anova_div, html.H2(independent_var))