Exemple #1
0
def get_model_div(key):
    models = db.get('models')
    model = models[key]
    summary = model['summary']
    tag = model['tag']
    type = model['type']
    params = model['params']
    anova = model['anova']
    x_col = model['x_col']
    y_col = model['y_col']
    error_mean = model['error_mean']

    df_stats = common.get_stats_df(summary, x_col, y_col)
    stats_div = dbc.Table.from_dataframe(df_stats,
                                         striped=True,
                                         bordered=True,
                                         hover=True,
                                         style=common.table_style)

    df_coeff = common.get_coeff_df(params, x_col)
    coeff_div = dbc.Table.from_dataframe(df_coeff,
                                         striped=True,
                                         bordered=True,
                                         hover=True,
                                         style=common.table_style)

    anova_div = common.get_anova_div(anova)

    div = html.Div([
        html.H2("Model: " + key),
        html.H2("Tag: Tag" + str(tag)),
        html.H2("Type: " + type),
        html.H2('Statistics Summary Table'), stats_div,
        html.H2('Linear Regression Coefficients'), coeff_div,
        html.H2('Error Mean = ' + str(round(error_mean, 4))),
        html.Br(),
        html.H2('Anova'), anova_div
    ])
    return div
def linear_regression(n):
    global df_cleaned
    file = db.get('lr.file')
    if file is None:
        file = 'empty'
    path = FileUtils.path('clean', file)
    df_cleaned = pd.read_csv(path)
    tdf = df_cleaned.head(10).round(4)
    div = [
        html.Div(children=[
            html.H2(children='Cleaned Data: ' + file),
            html.H2(children='Tag: Tag ' + str(db.get('tags')[file])),
            dbc.Table.from_dataframe(tdf,
                                     striped=True,
                                     bordered=True,
                                     hover=True,
                                     style=common.table_style)
        ]),
        html.Hr(),
        html.H3(children='Variable Selection and Plotting'),
        html.Div([
            html.Div([
                html.Div(id='ordered-df', style={'display': 'none'}),
                html.Hr(),
                html.Label('Select X-axis variable for scatter plot'),
                dcc.Dropdown(id='x-var-plot',
                             options=[{
                                 'label': i,
                                 'value': i
                             } for i in df_cleaned.columns],
                             multi=False),
                html.Label('Select Y-axis variable for scatter plot'),
                dcc.Dropdown(id='y-var-plot',
                             options=[{
                                 'label': i,
                                 'value': i
                             } for i in df_cleaned.columns],
                             multi=False),
                html.Br(),
                html.H2('Perform Linear Regression'),
                html.Label('Select X variable from Dropdown'),
                dcc.Dropdown(id='x-var-selection',
                             options=[{
                                 'label': i,
                                 'value': i
                             } for i in df_cleaned.columns],
                             multi=True),
                html.Label('Select Y variable from Dropdown'),
                dcc.Dropdown(id='y-var-selection',
                             options=[{
                                 'label': i,
                                 'value': i
                             } for i in df_cleaned.columns],
                             multi=False),
            ],
                     style={
                         'width': '48%',
                         'display': 'inline-block'
                     }),
            html.Div([
                html.Label('Scatter Plot'),
                dcc.Graph(id='scatter-plot'),
            ],
                     style={
                         'width': '48%',
                         'float': 'right',
                         'display': 'inline-block'
                     }),
        ]),
        html.Hr(),
        html.Div([
            html.Div([], id='linear-regression-status'),
            html.Br(),
            html.H2('Statistics Summary Table'),
            html.Table(id='stats_table'),
            html.H2('Linear Regression Coefficients'),
            html.Table(id='coeff_table'),
            html.H2('Plot')
        ]),
        html.Br(),
        html.Div([
            dcc.Graph(id='lr-y-ycap-plot', figure=y_ycap_fig),
            dcc.Graph(id='lr-error-plot', figure=error_fig),
            html.Div([], id='lr-error-mean')
        ]),
        html.Div([
            html.Hr(),
            html.H2('ANOVA Table'),
            html.Div([], id='lr-anova-table'),
        ]),
        html.Div([
            html.Hr(),
            dbc.Label(
                'Predict Data (pass comma separated) Dependent Variables'),
            dbc.Input(id="lr-predict-data",
                      placeholder="Model Name",
                      type="text"),
            html.Br(),
            dbc.Button("Predict", color="primary", id='lr-predict'),
            html.Div([], id='lr-predict-display'),
            html.Div([], id='lr-predict-data-do-nothing'),
        ]),
        html.Div([
            html.Hr(),
            dbc.Label('Save Model'),
            dbc.Input(id="lr-save-model",
                      placeholder="Model Name",
                      type="text"),
            html.Br(),
            dbc.Button("Save", color="primary", id='lr-save'),
            html.Div([], id='lr-save-display'),
            html.Div([], id='lr-save-model-do-nothing'),
        ])
    ]
    return div
def stats_table_and_linear_regression(json_ordered_data):
    if json_ordered_data is None:
        return (common.msg(None), generate_table(pd.DataFrame(columns=[])),
                generate_table(pd.DataFrame(columns=[])), y_ycap_fig,
                error_fig, "", "")
    dff = pd.read_json(json_ordered_data, orient='split')
    col = list(dff.columns)
    y = list(dff[col[-1]])
    data = []
    x_col = col[:-1]
    y_col = col[-1]
    data = [[] for i in range(len(x_col))]
    for i in range(len(x_col)):
        x = dff[x_col[i]].values.tolist()
        data[i] = x

    ##Team 3 API Integration
    try:
        model = LinearRegression()
        db.put("lr.model", model)
        db.put("lr.x_col", x_col)
        db.put("lr.y_col", y_col)
        (summary, params, ycap) = model.fit(data, y)
        db.put("lr.summary", summary)
        db.put("lr.params", params)
        db.put("lr.ycap", ycap)
        error_mean = model.model_stats()['mean']
        db.put("lr.error_mean", error_mean)
    except (Exception, ValueError) as e:
        return (common.error_msg("Linear Regression API Error: " + str(e)),
                generate_table(pd.DataFrame(columns=[])),
                generate_table(pd.DataFrame(columns=[])), y_ycap_fig,
                error_fig, "", "")

    df_stats = common.get_stats_df(summary, x_col, y_col)
    table1 = dbc.Table.from_dataframe(df_stats,
                                      striped=True,
                                      bordered=True,
                                      hover=True,
                                      style=common.table_style)

    df_coeff = common.get_coeff_df(params, x_col)
    table2 = dbc.Table.from_dataframe(df_coeff,
                                      striped=True,
                                      bordered=True,
                                      hover=True,
                                      style=common.table_style)

    trace_1 = go.Scatter(x=list(range(len(y))),
                         y=ycap,
                         name='Y Predicted (ŷ)',
                         line=dict(width=2, color='rgb(229, 151, 50)'))
    trace_2 = go.Scatter(x=list(range(len(y))),
                         y=y,
                         name='Y Actual',
                         line=dict(width=2, color='rgb(106, 181, 135)'))
    ydiff = [y[i] - ycap[i] for i in range(len(y))]
    trace_3 = go.Scatter(x=list(range(len(y))),
                         y=ydiff,
                         line=dict(width=2, color='rgb(236, 10, 15)'))

    fig1 = go.Figure(data=[trace_1, trace_2], layout=y_ycap_title)
    fig2 = go.Figure(data=[trace_3], layout=error_title)
    error_mean = html.H2('Error Mean = ' +
                         str(round(db.get('lr.error_mean'), 4)))

    ##Team 5 API Integration
    anova = get_anova(y, ycap, len(params))
    db.put('lr.anova', anova)
    anova_div = common.get_anova_div(anova)

    return (
        common.success_msg("Linear Regression API Exceuted Successfully!!"),
        table1, table2, fig1, fig2, error_mean, anova_div)
def lr_save_model(n_clicks):
    model_name = db.get('cl.model_name')
    if model_name is None or model_name == '':
        return ("", "")
    file = db.get("lr.file")
    model = db.get("lr.model")
    params = db.get("lr.params")
    anova = db.get("lr.anova")
    error_mean = db.get('lr.error_mean')
    summary = db.get("lr.summary")
    x_col = db.get("lr.x_col")
    y_col = db.get("lr.y_col")
    m = {
        'file': file,
        'tag': db.get('tags')[file],
        'type': 'linear',
        'name': model_name,
        'model': model,
        'params': params,
        'anova': anova,
        'summary': summary,
        'x_col': x_col,
        'y_col': y_col,
        'error_mean': error_mean
    }
    models = db.get('models')
    if models is None:
        models = {}
        db.put('models', models)
    models[model_name] = m
    return (common.success_msg('Model "' + model_name +
                               '" Saved Successfully.'), "")
from dash.dependencies import Input, Output, State
import dash_table
import plotly.graph_objs as go

import pandas as pd

from dataanalytics.ux.app import app
from dataanalytics.ux.apps import common
from dataanalytics.ux.apps.common import *
from dataanalytics.framework.database import db
from dataanalytics.framework.file_utils import FileUtils
from dataanalytics.framework.data_utils import DataUtils
from dataanalytics.stats_linear_regression.linear_regression import LinearRegression
from dataanalytics.stat_anova.anova import get_anova

file = db.get('lr.file')
if file is None:
    file = 'empty'
path = FileUtils.path('clean', file)
df_cleaned = pd.read_csv(path)

y_ycap_title = go.Layout(title='Actual vs Predicted Y Plot',
                         hovermode='closest',
                         xaxis={'title': 'Sequence of data points'},
                         yaxis={'title': 'y,ŷ'})
y_ycap_fig = go.Figure(data=[], layout=y_ycap_title)

error_title = go.Layout(title='Error Plot',
                        hovermode='closest',
                        xaxis={'title': 'Sequence of data points'},
                        yaxis={'title': 'Error = y - ŷ'})
Exemple #6
0
def hor_save_model(n_clicks):
    model_name = db.get('hor.model_name')
    if model_name is None or model_name == '':
        return ("", "")
    file = db.get("hor.file")
    model = db.get("hor.model")
    order = db.get("hor.order")
    params = db.get("hor.params")
    anova = db.get("hor.anova")
    error_mean = db.get('hor.error_mean')
    summary = db.get("hor.summary")
    x_col = db.get("hor.x_col")
    y_col = db.get("hor.y_col")
    m = {
        'file': file,
        'tag': db.get('tags')[file],
        'type': 'Higher Order Polynomial',
        'name': model_name,
        'model': model,
        'order': order,
        'params': params,
        'no_of_coeff': len(params),
        'anova': anova,
        'summary': summary,
        'x_col': x_col,
        'y_col': y_col,
        'error_mean': error_mean
    }
    models = db.get('models')
    if models is None:
        models = {}
        db.put('models', models)
    models[model_name] = m
    return (common.success_msg('Model "' + model_name +
                               '" Saved Successfully.'), "")
Exemple #7
0
def stats_table_and_hor_regression(json_ordered_data, hor_order):
    if json_ordered_data is None or hor_order is None:
        return (common.msg(None), generate_table(pd.DataFrame(columns=[])),
                generate_table(pd.DataFrame(columns=[])), y_ycap_fig,
                error_fig, "", "", "")
    dff = pd.read_json(json_ordered_data, orient='split')
    col = list(dff.columns)
    x_col = [col[0]]
    y_col = col[1]
    dff = dff.sort_values(by=x_col)
    x = list(dff[col[0]])
    y = list(dff[col[1]])

    ##Team 4 API Integration
    try:
        db.put("hor.x_col", x_col)
        db.put("hor.y_col", y_col)
        (ycap, params) = Building_model_equation(x, y, hor_order)
        print(params)
        params = params.tolist()
        print(params)
        db.put("hor.params", params)
        db.put("hor.ycap", ycap)
        db.put("hor.order", hor_order)
        error = 10.0
        for i in range(len(y)):
            error += y[i] - ycap[i]
        error_mean = error / len(y)
        db.put("hor.error_mean", error_mean)
        db.put("hor.error_mean", error_mean)

        ## Team 3 API call for Summary Statistics
        model = LinearRegression()
        (summary, params_ignore, ycap_ignore) = model.fit([x], y)
        db.put("hor.summary", summary)
    except (Exception, ValueError) as e:
        return (common.error_msg("Higher Order Regression API Error: " +
                                 str(e)),
                generate_table(pd.DataFrame(columns=[])),
                generate_table(pd.DataFrame(columns=[])), y_ycap_fig,
                error_fig, "", "", "")

    df_stats = common.get_stats_df(summary, x_col, y_col)
    table1 = dbc.Table.from_dataframe(df_stats,
                                      striped=True,
                                      bordered=True,
                                      hover=True,
                                      style=common.table_style)

    df_coeff = common.hor_get_coeff_df(params)
    table2 = dbc.Table.from_dataframe(df_coeff,
                                      striped=True,
                                      bordered=True,
                                      hover=True,
                                      style=common.table_style)

    trace_actual = go.Scatter(x=x,
                              y=y,
                              name='Y Actual',
                              mode='markers',
                              marker=dict(color='rgb(106, 181, 135)'))

    trace_predict = go.Scatter(x=x,
                               y=ycap,
                               name='Y Predicted (ŷ)',
                               line=dict(width=2, color='rgb(229, 151, 50)'))

    ydiff = [y[i] - ycap[i] for i in range(len(y))]
    trace_error = go.Scatter(x=x,
                             y=ydiff,
                             line=dict(width=2, color='rgb(236, 10, 15)'))

    x_title = "x (" + str(x_col[0]) + ")"
    y_title = "y,ŷ(" + str(y_col) + ")"
    y_ycap_title = go.Layout(title='Actual vs Predicted Y Plot',
                             hovermode='closest',
                             xaxis={'title': x_title},
                             yaxis={'title': y_title})
    error_title = go.Layout(title='Error Plot',
                            hovermode='closest',
                            xaxis={'title': x_title},
                            yaxis={'title': 'Error = y - ŷ'})

    fig1 = go.Figure(data=[trace_predict, trace_actual], layout=y_ycap_title)
    fig2 = go.Figure(data=[trace_error], layout=error_title)
    error_mean = html.H2('Error Mean = ' +
                         str(round(db.get('hor.error_mean'), 4)))

    ##Team 5 API Integration
    anova = get_anova(y, ycap, len(params))
    db.put('hor.anova', anova)
    anova_div = common.get_anova_div(anova)

    independent_var = ','.join(x_col)

    return (common.success_msg(
        "Higher Order Regression API Exceuted Successfully!!"), table1, table2,
            fig1, fig2, error_mean, anova_div, html.H2(independent_var))
Exemple #8
0
import dash_table
import plotly.graph_objs as go

import pandas as pd

from dataanalytics.ux.app import app
from dataanalytics.ux.apps import common
from dataanalytics.ux.apps.common import *
from dataanalytics.framework.database import db
from dataanalytics.framework.file_utils import FileUtils
from dataanalytics.framework.data_utils import DataUtils
from dataanalytics.higher_order_regression.higher_order_polynomial_V1 import *
from dataanalytics.stats_linear_regression.linear_regression import LinearRegression
from dataanalytics.stat_anova.anova import get_anova

file = db.get('hor.file')
if file is None:
    file = 'empty'
path = FileUtils.path('clean', file)
df_cleaned = pd.read_csv(path)

y_ycap_title = go.Layout(title='Actual vs Predicted Y Plot',
                         hovermode='closest',
                         xaxis={'title': 'Sequence of data points'},
                         yaxis={'title': 'y,ŷ'})
y_ycap_fig = go.Figure(data=[], layout=y_ycap_title)

error_title = go.Layout(title='Error Plot',
                        hovermode='closest',
                        xaxis={'title': 'Sequence of data points'},
                        yaxis={'title': 'Error = y - ŷ'})
Exemple #9
0
def stats_table_and_linear_regression(json_ordered_data):
    if json_ordered_data is None:
        return (common.msg(None), generate_table(pd.DataFrame(columns=[])),
                generate_table(pd.DataFrame(columns=[])), y_ycap_fig,
                error_fig, "", "", "")
    dff = pd.read_json(json_ordered_data, orient='split')
    col = list(dff.columns)
    x_col = col[:-1]
    y_col = col[-1]

    if len(x_col) == 1:
        dff = dff.sort_values(by=x_col)

    data = [[] for i in range(len(x_col))]
    for i in range(len(x_col)):
        x = dff[x_col[i]].values.tolist()
        data[i] = x
    y = list(dff[col[-1]])
    ##Team 3 API Integration
    try:
        model = LinearRegression()
        db.put("lr.model", model)
        db.put("lr.x_col", x_col)
        db.put("lr.y_col", y_col)
        (summary, params, ycap) = model.fit(data, y)
        db.put("lr.summary", summary)
        db.put("lr.params", params)
        db.put("lr.ycap", ycap)
        error_mean = model.model_stats()['mean']
        db.put("lr.error_mean", error_mean)
    except (Exception, ValueError) as e:
        return (common.error_msg("Linear Regression API Error: " + str(e)),
                generate_table(pd.DataFrame(columns=[])),
                generate_table(pd.DataFrame(columns=[])), y_ycap_fig,
                error_fig, "", "", "")

    df_stats = common.get_stats_df(summary, x_col, y_col)
    table1 = dbc.Table.from_dataframe(df_stats,
                                      striped=True,
                                      bordered=True,
                                      hover=True,
                                      style=common.table_style)

    df_coeff = common.get_coeff_df(params, x_col)
    table2 = dbc.Table.from_dataframe(df_coeff,
                                      striped=True,
                                      bordered=True,
                                      hover=True,
                                      style=common.table_style)

    if len(data) == 1:
        trace_x = data[0]
        x_title = "x (" + str(x_col[0]) + ")"
        trace_actual = go.Scatter(x=trace_x,
                                  y=y,
                                  name='Y Actual',
                                  mode='markers',
                                  marker=dict(color='rgb(106, 181, 135)'))
    else:
        trace_x = list(range(len(y)))
        x_title = 'Sequence of data points'
        trace_actual = go.Scatter(x=trace_x,
                                  y=y,
                                  name='Y Actual',
                                  line=dict(width=2,
                                            color='rgb(106, 181, 135)'))

    trace_predict = go.Scatter(x=trace_x,
                               y=ycap,
                               name='Y Predicted (ŷ)',
                               line=dict(width=2, color='rgb(229, 151, 50)'))

    ydiff = [y[i] - ycap[i] for i in range(len(y))]
    trace_error = go.Scatter(x=trace_x,
                             y=ydiff,
                             line=dict(width=2, color='rgb(236, 10, 15)'))

    y_title = "y,ŷ(" + str(y_col) + ")"
    y_ycap_title = go.Layout(title='Actual vs Predicted Y Plot',
                             hovermode='closest',
                             xaxis={'title': x_title},
                             yaxis={'title': y_title})
    error_title = go.Layout(title='Error Plot',
                            hovermode='closest',
                            xaxis={'title': x_title},
                            yaxis={'title': 'Error = y - ŷ'})

    fig1 = go.Figure(data=[trace_predict, trace_actual], layout=y_ycap_title)
    fig2 = go.Figure(data=[trace_error], layout=error_title)
    error_mean = html.H2('Error Mean = ' +
                         str(round(db.get('lr.error_mean'), 4)))

    ##Team 5 API Integration
    anova = get_anova(y, ycap, len(params))
    db.put('lr.anova', anova)
    anova_div = common.get_anova_div(anova)

    independent_var = ','.join(x_col)

    return (
        common.success_msg("Linear Regression API Exceuted Successfully!!"),
        table1, table2, fig1, fig2, error_mean, anova_div,
        html.H2(independent_var))