Example #1
0
def hor_save_model(n_clicks):
    model_name = db.get('hor.model_name')
    if model_name is None or model_name == '':
        return ("", "")
    file = db.get("hor.file")
    model = db.get("hor.model")
    order = db.get("hor.order")
    params = db.get("hor.params")
    anova = db.get("hor.anova")
    error_mean = db.get('hor.error_mean')
    summary = db.get("hor.summary")
    x_col = db.get("hor.x_col")
    y_col = db.get("hor.y_col")
    m = {
        'file': file,
        'tag': db.get('tags')[file],
        'type': 'Higher Order Polynomial',
        'name': model_name,
        'model': model,
        'order': order,
        'params': params,
        'no_of_coeff': len(params),
        'anova': anova,
        'summary': summary,
        'x_col': x_col,
        'y_col': y_col,
        'error_mean': error_mean
    }
    models = db.get('models')
    if models is None:
        models = {}
        db.put('models', models)
    models[model_name] = m
    return (common.success_msg('Model "' + model_name +
                               '" Saved Successfully.'), "")
Example #2
0
def display_data(value):
    """Displaying the head for the selected file."""
    db_value = db.get("file")
    if value is None and db_value is None:
        return ""
    elif value is None and not db_value is None:
        value = db_value
    format = FileUtils.file_format(value)
    if format == 'csv' or format == 'txt':
        path = FileUtils.path('raw', value)
        head = DataUtils.read_text_head(path)
        table_col = [
            html.Col(style={'width': "10%"}),
            html.Col(style={'width': "90%"})
        ]
        table_header = [
            html.Thead(html.Tr([html.Th("Row No"),
                                html.Th("Data")]))
        ]
        rows = []
        for i in range(len(head)):
            row = html.Tr([html.Td(i + 1), html.Td(head[i])])
            rows.append(row)
        table_body = [html.Tbody(rows)]
        table = dbc.Table(table_col + table_header + table_body,
                          bordered=True,
                          style=common.table_style)
        div = [
            common.msg("Selected File: " + value),
            common.msg("Selected Format: " + format), table,
            html.Br(), csv_properties_div
        ]
    elif format == 'xls' or format == 'xlsx':
        path = FileUtils.path('raw', value)
        xls = pd.ExcelFile(path)
        sheets = xls.sheet_names
        div = [
            common.msg("Selected File: " + value),
            common.msg("Selected Format: " + format),
            common.msg("Select Sheet:"),
            html.Div([
                dcc.Dropdown(id='xls-file-sheet',
                             options=[{
                                 'label': sheet,
                                 'value': sheet
                             } for sheet in sheets],
                             value=None,
                             multi=False)
            ],
                     style={
                         'margin': '10px',
                         'width': '50%'
                     }),
            html.Div([], id="display-xls-file")
        ]
    else:
        div = "Format Not Supported!!"
    db.put("file", value)
    db.put("format", format)
    return div
def lr_save_model(n_clicks):
    model_name = db.get('cl.model_name')
    if model_name is None or model_name == '':
        return ("", "")
    file = db.get("lr.file")
    model = db.get("lr.model")
    params = db.get("lr.params")
    anova = db.get("lr.anova")
    error_mean = db.get('lr.error_mean')
    summary = db.get("lr.summary")
    x_col = db.get("lr.x_col")
    y_col = db.get("lr.y_col")
    m = {
        'file': file,
        'tag': db.get('tags')[file],
        'type': 'linear',
        'name': model_name,
        'model': model,
        'params': params,
        'anova': anova,
        'summary': summary,
        'x_col': x_col,
        'y_col': y_col,
        'error_mean': error_mean
    }
    models = db.get('models')
    if models is None:
        models = {}
        db.put('models', models)
    models[model_name] = m
    return (common.success_msg('Model "' + model_name +
                               '" Saved Successfully.'), "")
Example #4
0
def xls_file_sheet(value):
    file = db.get("file")
    div = None
    db_value = db.get("sheet")
    if value is None and db_value is None:
        div = []
    elif value is None and not db_value is None:
        value = db_value
    if not value is None:
        db.put('sheet', value)
        path = FileUtils.path('raw', file)
        xls = pd.ExcelFile(path)
        df = pd.read_excel(xls, value)
        table = html.Div([
            dash_table.DataTable(
                data=df.iloc[:10].to_dict('rows'),
                columns=[{'name': i, 'id': i} for i in df.columns]
            ),
            html.Hr(),
        ])
        div = [html.Br(),
                table,
                html.Br(),
                xls_properties_div]
    return div
Example #5
0
def apply_file_properties(n):
    file = db.get("file")
    format = db.get("format")
    sep = db.get("file_separator")
    header = db.get("file_header")
    div = None
    df = None
    if format is None:
        div = None
        return div
    elif (format == 'csv' or format == 'txt' or format == 'xls'
          or format == 'xlsx') and header is None:
        div = common.error_msg('Please Select Header!!')
        return div
    elif format == 'csv' or format == 'txt':
        if sep is None:
            sep = ','
            db.put("file_separator", sep)
        path = FileUtils.path('raw', file)
        df = DataUtils.read_csv(path, sep, header)
        msg = "Following Properties Applied. Separator=" + sep + " Header=" + str(
            header)
    elif format == 'xls' or format == 'xlsx':
        path = FileUtils.path('raw', file)
        sheet = db.get("sheet")
        df = DataUtils.read_xls(path, sheet, header)
        msg = "Following Properties Applied. Header=" + str(header)

    table = dbc.Table.from_dataframe(df.head(10),
                                     striped=True,
                                     bordered=True,
                                     hover=True,
                                     style=common.table_style)
    button = dbc.Button("Clean & Save", color="primary", id='clean-save-file')
    div = [
        common.msg(msg), table,
        html.Div(
            [button, html.Br(),
             html.Div([], id="cleaned-saved-file")],
            style={
                'padding': '10px',
                'textAlign': 'center'
            })
    ]
    db.put("raw_data", df)
    return div
Example #6
0
def xls_file_sheet(value):
    file = db.get("file")
    div = None
    db_value = db.get("sheet")
    if value is None and db_value is None:
        div = []
    elif value is None and not db_value is None:
        value = db_value
    if not value is None:
        db.put('sheet', value)
        path = FileUtils.path('raw', file)
        xls = pd.ExcelFile(path)
        df = pd.read_excel(xls, value)
        table = html.Div([
            dbc.Table.from_dataframe(df.head(10),
                                     striped=True,
                                     bordered=True,
                                     hover=True,
                                     style=common.table_style),
            html.Hr(),
        ])
        div = [html.Br(), table, html.Br(), xls_properties_div]
    return div
Example #7
0
def file_separator(value):
    if not value is None:
        db.put("file_separator", value)
    return None
Example #8
0
"""App Main

python3 app_main
"""
import os
from dataanalytics.framework.file_utils import FileUtils
FileUtils.mkdir('raw')
FileUtils.mkdir('clean')

from dataanalytics.framework.database import db
clean_files = FileUtils.files('clean')
tags = {'empty': 1}
db.put('tags', tags)
for file in clean_files:
    tags[file] = 1

from dataanalytics.ux import index

if __name__ == '__main__':
    ""
Example #9
0
def rc_select_file(value):
    if not value is None:
        db.put("rc.file", value)
    return None
Example #10
0
def stats_table_and_hor_regression(json_ordered_data, hor_order):
    if json_ordered_data is None or hor_order is None:
        return (common.msg(None), generate_table(pd.DataFrame(columns=[])),
                generate_table(pd.DataFrame(columns=[])), y_ycap_fig,
                error_fig, "", "", "")
    dff = pd.read_json(json_ordered_data, orient='split')
    col = list(dff.columns)
    x_col = [col[0]]
    y_col = col[1]
    dff = dff.sort_values(by=x_col)
    x = list(dff[col[0]])
    y = list(dff[col[1]])

    ##Team 4 API Integration
    try:
        db.put("hor.x_col", x_col)
        db.put("hor.y_col", y_col)
        (ycap, params) = Building_model_equation(x, y, hor_order)
        print(params)
        params = params.tolist()
        print(params)
        db.put("hor.params", params)
        db.put("hor.ycap", ycap)
        db.put("hor.order", hor_order)
        error = 10.0
        for i in range(len(y)):
            error += y[i] - ycap[i]
        error_mean = error / len(y)
        db.put("hor.error_mean", error_mean)
        db.put("hor.error_mean", error_mean)

        ## Team 3 API call for Summary Statistics
        model = LinearRegression()
        (summary, params_ignore, ycap_ignore) = model.fit([x], y)
        db.put("hor.summary", summary)
    except (Exception, ValueError) as e:
        return (common.error_msg("Higher Order Regression API Error: " +
                                 str(e)),
                generate_table(pd.DataFrame(columns=[])),
                generate_table(pd.DataFrame(columns=[])), y_ycap_fig,
                error_fig, "", "", "")

    df_stats = common.get_stats_df(summary, x_col, y_col)
    table1 = dbc.Table.from_dataframe(df_stats,
                                      striped=True,
                                      bordered=True,
                                      hover=True,
                                      style=common.table_style)

    df_coeff = common.hor_get_coeff_df(params)
    table2 = dbc.Table.from_dataframe(df_coeff,
                                      striped=True,
                                      bordered=True,
                                      hover=True,
                                      style=common.table_style)

    trace_actual = go.Scatter(x=x,
                              y=y,
                              name='Y Actual',
                              mode='markers',
                              marker=dict(color='rgb(106, 181, 135)'))

    trace_predict = go.Scatter(x=x,
                               y=ycap,
                               name='Y Predicted (ŷ)',
                               line=dict(width=2, color='rgb(229, 151, 50)'))

    ydiff = [y[i] - ycap[i] for i in range(len(y))]
    trace_error = go.Scatter(x=x,
                             y=ydiff,
                             line=dict(width=2, color='rgb(236, 10, 15)'))

    x_title = "x (" + str(x_col[0]) + ")"
    y_title = "y,ŷ(" + str(y_col) + ")"
    y_ycap_title = go.Layout(title='Actual vs Predicted Y Plot',
                             hovermode='closest',
                             xaxis={'title': x_title},
                             yaxis={'title': y_title})
    error_title = go.Layout(title='Error Plot',
                            hovermode='closest',
                            xaxis={'title': x_title},
                            yaxis={'title': 'Error = y - ŷ'})

    fig1 = go.Figure(data=[trace_predict, trace_actual], layout=y_ycap_title)
    fig2 = go.Figure(data=[trace_error], layout=error_title)
    error_mean = html.H2('Error Mean = ' +
                         str(round(db.get('hor.error_mean'), 4)))

    ##Team 5 API Integration
    anova = get_anova(y, ycap, len(params))
    db.put('hor.anova', anova)
    anova_div = common.get_anova_div(anova)

    independent_var = ','.join(x_col)

    return (common.success_msg(
        "Higher Order Regression API Exceuted Successfully!!"), table1, table2,
            fig1, fig2, error_mean, anova_div, html.H2(independent_var))
Example #11
0
def cl_model_name_input(value):
    if not value is None:
        db.put("cl.model_name", value)
    return None
Example #12
0
def lr_predict_input(value):
    if not value is None:
        db.put("cl.predict_data", value)
    return None
Example #13
0
def stats_table_and_linear_regression(json_ordered_data):
    if json_ordered_data is None:
        return (common.msg(None), generate_table(pd.DataFrame(columns=[])),
                generate_table(pd.DataFrame(columns=[])), y_ycap_fig,
                error_fig, "", "")
    dff = pd.read_json(json_ordered_data, orient='split')
    col = list(dff.columns)
    y = list(dff[col[-1]])
    data = []
    x_col = col[:-1]
    y_col = col[-1]
    data = [[] for i in range(len(x_col))]
    for i in range(len(x_col)):
        x = dff[x_col[i]].values.tolist()
        data[i] = x

    ##Team 3 API Integration
    try:
        model = LinearRegression()
        db.put("lr.model", model)
        db.put("lr.x_col", x_col)
        db.put("lr.y_col", y_col)
        (summary, params, ycap) = model.fit(data, y)
        db.put("lr.summary", summary)
        db.put("lr.params", params)
        db.put("lr.ycap", ycap)
        error_mean = model.model_stats()['mean']
        db.put("lr.error_mean", error_mean)
    except (Exception, ValueError) as e:
        return (common.error_msg("Linear Regression API Error: " + str(e)),
                generate_table(pd.DataFrame(columns=[])),
                generate_table(pd.DataFrame(columns=[])), y_ycap_fig,
                error_fig, "", "")

    df_stats = common.get_stats_df(summary, x_col, y_col)
    table1 = dbc.Table.from_dataframe(df_stats,
                                      striped=True,
                                      bordered=True,
                                      hover=True,
                                      style=common.table_style)

    df_coeff = common.get_coeff_df(params, x_col)
    table2 = dbc.Table.from_dataframe(df_coeff,
                                      striped=True,
                                      bordered=True,
                                      hover=True,
                                      style=common.table_style)

    trace_1 = go.Scatter(x=list(range(len(y))),
                         y=ycap,
                         name='Y Predicted (ŷ)',
                         line=dict(width=2, color='rgb(229, 151, 50)'))
    trace_2 = go.Scatter(x=list(range(len(y))),
                         y=y,
                         name='Y Actual',
                         line=dict(width=2, color='rgb(106, 181, 135)'))
    ydiff = [y[i] - ycap[i] for i in range(len(y))]
    trace_3 = go.Scatter(x=list(range(len(y))),
                         y=ydiff,
                         line=dict(width=2, color='rgb(236, 10, 15)'))

    fig1 = go.Figure(data=[trace_1, trace_2], layout=y_ycap_title)
    fig2 = go.Figure(data=[trace_3], layout=error_title)
    error_mean = html.H2('Error Mean = ' +
                         str(round(db.get('lr.error_mean'), 4)))

    ##Team 5 API Integration
    anova = get_anova(y, ycap, len(params))
    db.put('lr.anova', anova)
    anova_div = common.get_anova_div(anova)

    return (
        common.success_msg("Linear Regression API Exceuted Successfully!!"),
        table1, table2, fig1, fig2, error_mean, anova_div)
Example #14
0
def hor_model_name_input(value):
    if not value is None:
        db.put("hor.model_name", value)
    return None
Example #15
0
def hor_predict_input(value):
    if not value is None:
        db.put("hor.predict_data", value)
    return None
Example #16
0
def file_header_true(value):
    if value == 1:
        db.put("file_header", True)
    elif value == 0:
        db.put("file_header", False)
    return None
Example #17
0
def linear_regression_file_value(value):
    if not value is None:
        db.clear('lr.')
        db.put('lr.file', value)
    return None
Example #18
0
def stats_table_and_linear_regression(json_ordered_data):
    if json_ordered_data is None:
        return (common.msg(None), generate_table(pd.DataFrame(columns=[])),
                generate_table(pd.DataFrame(columns=[])), y_ycap_fig,
                error_fig, "", "", "")
    dff = pd.read_json(json_ordered_data, orient='split')
    col = list(dff.columns)
    x_col = col[:-1]
    y_col = col[-1]

    if len(x_col) == 1:
        dff = dff.sort_values(by=x_col)

    data = [[] for i in range(len(x_col))]
    for i in range(len(x_col)):
        x = dff[x_col[i]].values.tolist()
        data[i] = x
    y = list(dff[col[-1]])
    ##Team 3 API Integration
    try:
        model = LinearRegression()
        db.put("lr.model", model)
        db.put("lr.x_col", x_col)
        db.put("lr.y_col", y_col)
        (summary, params, ycap) = model.fit(data, y)
        db.put("lr.summary", summary)
        db.put("lr.params", params)
        db.put("lr.ycap", ycap)
        error_mean = model.model_stats()['mean']
        db.put("lr.error_mean", error_mean)
    except (Exception, ValueError) as e:
        return (common.error_msg("Linear Regression API Error: " + str(e)),
                generate_table(pd.DataFrame(columns=[])),
                generate_table(pd.DataFrame(columns=[])), y_ycap_fig,
                error_fig, "", "", "")

    df_stats = common.get_stats_df(summary, x_col, y_col)
    table1 = dbc.Table.from_dataframe(df_stats,
                                      striped=True,
                                      bordered=True,
                                      hover=True,
                                      style=common.table_style)

    df_coeff = common.get_coeff_df(params, x_col)
    table2 = dbc.Table.from_dataframe(df_coeff,
                                      striped=True,
                                      bordered=True,
                                      hover=True,
                                      style=common.table_style)

    if len(data) == 1:
        trace_x = data[0]
        x_title = "x (" + str(x_col[0]) + ")"
        trace_actual = go.Scatter(x=trace_x,
                                  y=y,
                                  name='Y Actual',
                                  mode='markers',
                                  marker=dict(color='rgb(106, 181, 135)'))
    else:
        trace_x = list(range(len(y)))
        x_title = 'Sequence of data points'
        trace_actual = go.Scatter(x=trace_x,
                                  y=y,
                                  name='Y Actual',
                                  line=dict(width=2,
                                            color='rgb(106, 181, 135)'))

    trace_predict = go.Scatter(x=trace_x,
                               y=ycap,
                               name='Y Predicted (ŷ)',
                               line=dict(width=2, color='rgb(229, 151, 50)'))

    ydiff = [y[i] - ycap[i] for i in range(len(y))]
    trace_error = go.Scatter(x=trace_x,
                             y=ydiff,
                             line=dict(width=2, color='rgb(236, 10, 15)'))

    y_title = "y,ŷ(" + str(y_col) + ")"
    y_ycap_title = go.Layout(title='Actual vs Predicted Y Plot',
                             hovermode='closest',
                             xaxis={'title': x_title},
                             yaxis={'title': y_title})
    error_title = go.Layout(title='Error Plot',
                            hovermode='closest',
                            xaxis={'title': x_title},
                            yaxis={'title': 'Error = y - ŷ'})

    fig1 = go.Figure(data=[trace_predict, trace_actual], layout=y_ycap_title)
    fig2 = go.Figure(data=[trace_error], layout=error_title)
    error_mean = html.H2('Error Mean = ' +
                         str(round(db.get('lr.error_mean'), 4)))

    ##Team 5 API Integration
    anova = get_anova(y, ycap, len(params))
    db.put('lr.anova', anova)
    anova_div = common.get_anova_div(anova)

    independent_var = ','.join(x_col)

    return (
        common.success_msg("Linear Regression API Exceuted Successfully!!"),
        table1, table2, fig1, fig2, error_mean, anova_div,
        html.H2(independent_var))