def rc_compare(n_clicks):
    key = db.get("rc.model")
    models = db.get('models')
    if key is None:
        return common.error_msg("Select Model for Details!!")
    if models is None:
        return common.error_msg("No Model has been trainned, Please Train Models First!!")
    return get_model_div(key)
Exemple #2
0
def rc_compare(n_clicks):
    file = db.get("rc.file")
    models = db.get('models')
    if file is None:
        return common.error_msg("Select Models for Comparison!!")
    if models is None:
        return common.error_msg(
            "No Model has been trainned, Please Train Models First!!")
    compare_div = get_compare_div(file)
    return html.Div([compare_div, html.Br(), html.Hr()])
Exemple #3
0
def get_compare_div(file):
    models = db.get('models')
    keys = []
    for key, value in models.items():
        f = value['file']
        if f == file:
            keys.append(key)
    if len(keys) == 0:
        return common.error_msg("No Model has been trainned for " + file +
                                ". Please Train Models First!!")

    tags = []
    for tag in get_property(keys, 'tag'):
        tags.append('Tag ' + str(tag))
    df = pd.DataFrame(columns=['Model Name'] + keys)
    df.loc[0] = ['Tag :'] + tags
    df.loc[1] = ['Type :'] + get_property(keys, 'type')
    df.loc[2] = ['No of Coefficients :'] + get_property(keys, 'params')
    df.loc[3] = ['F Statistics :'] + get_property(keys, 'anova', 'F')
    df.loc[4] = ['Cofficient of Determination :'] + get_property(
        keys, 'anova', 'R2')
    df.loc[5] = ['Error Mean :'] + get_property(keys, 'error_mean')
    df.round(4)
    div = html.Div([
        dbc.Table.from_dataframe(df,
                                 striped=True,
                                 bordered=True,
                                 hover=True,
                                 style=common.table_style)
    ])
    return div
def lr_predict_data(n_clicks):
    predict_data = db.get('cl.predict_data')
    if predict_data is None:
        return ""
    predict_data = get_predict_data_list(predict_data)
    model = db.get("lr.model")
    params = db.get("lr.params")
    if len(predict_data) != len(params) - 1:
        return common.error_msg('Pass Valid InDependent Variables!!')
    predicted = model.predict(predict_data)
    return common.success_msg('Predicted Dependent Variable = ' +
                              str(predicted))
Exemple #5
0
def clean_save_file(n):
    ## Team 2 API Integration
    df = db.get("raw_data")
    file = db.get("file")
    sheet = db.get("sheet")
    tags = db.get('tags')
    div = None
    if (not n is None) and (not df is None):
        try:
            df, cleaned_df, defective_df, stats = data_cleaning(df)

            if not sheet is None:
                file = FileUtils.append_file_name(file, sheet)
            file = file.split('.')[0]
            path = FileUtils.path('clean', file)
            cleaned_df.to_csv(path, index=False)

            ### Tag the cleaned data ###
            if file in tags:
                tags[file] = tags[file] + 1
            else:
                tags[file] = 1

            col_df = pd.DataFrame(columns=stats['col_name'])
            col_df.loc[0] = stats['col_type']
            stat_df = pd.DataFrame(columns=[
                'Tag', 'Total no of Records', 'Cleaned no of Records',
                'Defective no of Records'
            ])
            stat_df.loc[0] = [
                'Tag ' + str(tags[file]), stats['row_total'],
                stats['row_cleaned'], stats['row_defect']
            ]
            div = html.Div([
                common.success_msg("File is Cleaned & Saved Successfully!!"),
                html.H2('Cleaned Data Statistic'),
                dbc.Table.from_dataframe(stat_df,
                                         striped=True,
                                         bordered=True,
                                         hover=True,
                                         style=common.table_style),
                html.H2('Cleaned Data Schema'),
                dbc.Table.from_dataframe(col_df,
                                         striped=True,
                                         bordered=True,
                                         hover=True,
                                         style=common.table_style)
            ],
                           style={'margin': '10px'})
        except Exception as e:
            return common.error_msg("Data Cleansing API Error: " + str(e))
    return div
Exemple #6
0
def apply_file_properties(n):
    file = db.get("file")
    format = db.get("format")
    sep = db.get("file_separator")
    header = db.get("file_header")
    div = None
    df = None
    if format is None:
        div = None
        return div
    elif (format == 'csv' or format == 'txt' or format == 'xls'
          or format == 'xlsx') and header is None:
        div = common.error_msg('Please Select Header!!')
        return div
    elif format == 'csv' or format == 'txt':
        if sep is None:
            sep = ','
            db.put("file_separator", sep)
        path = FileUtils.path('raw', file)
        df = DataUtils.read_csv(path, sep, header)
        msg = "Following Properties Applied. Separator=" + sep + " Header=" + str(
            header)
    elif format == 'xls' or format == 'xlsx':
        path = FileUtils.path('raw', file)
        sheet = db.get("sheet")
        df = DataUtils.read_xls(path, sheet, header)
        msg = "Following Properties Applied. Header=" + str(header)

    table = dbc.Table.from_dataframe(df.head(10),
                                     striped=True,
                                     bordered=True,
                                     hover=True,
                                     style=common.table_style)
    button = dbc.Button("Clean & Save", color="primary", id='clean-save-file')
    div = [
        common.msg(msg), table,
        html.Div(
            [button, html.Br(),
             html.Div([], id="cleaned-saved-file")],
            style={
                'padding': '10px',
                'textAlign': 'center'
            })
    ]
    db.put("raw_data", df)
    return div
def stats_table_and_linear_regression(json_ordered_data):
    if json_ordered_data is None:
        return (common.msg(None), generate_table(pd.DataFrame(columns=[])),
                generate_table(pd.DataFrame(columns=[])), y_ycap_fig,
                error_fig, "", "")
    dff = pd.read_json(json_ordered_data, orient='split')
    col = list(dff.columns)
    y = list(dff[col[-1]])
    data = []
    x_col = col[:-1]
    y_col = col[-1]
    data = [[] for i in range(len(x_col))]
    for i in range(len(x_col)):
        x = dff[x_col[i]].values.tolist()
        data[i] = x

    ##Team 3 API Integration
    try:
        model = LinearRegression()
        db.put("lr.model", model)
        db.put("lr.x_col", x_col)
        db.put("lr.y_col", y_col)
        (summary, params, ycap) = model.fit(data, y)
        db.put("lr.summary", summary)
        db.put("lr.params", params)
        db.put("lr.ycap", ycap)
        error_mean = model.model_stats()['mean']
        db.put("lr.error_mean", error_mean)
    except (Exception, ValueError) as e:
        return (common.error_msg("Linear Regression API Error: " + str(e)),
                generate_table(pd.DataFrame(columns=[])),
                generate_table(pd.DataFrame(columns=[])), y_ycap_fig,
                error_fig, "", "")

    df_stats = common.get_stats_df(summary, x_col, y_col)
    table1 = dbc.Table.from_dataframe(df_stats,
                                      striped=True,
                                      bordered=True,
                                      hover=True,
                                      style=common.table_style)

    df_coeff = common.get_coeff_df(params, x_col)
    table2 = dbc.Table.from_dataframe(df_coeff,
                                      striped=True,
                                      bordered=True,
                                      hover=True,
                                      style=common.table_style)

    trace_1 = go.Scatter(x=list(range(len(y))),
                         y=ycap,
                         name='Y Predicted (ŷ)',
                         line=dict(width=2, color='rgb(229, 151, 50)'))
    trace_2 = go.Scatter(x=list(range(len(y))),
                         y=y,
                         name='Y Actual',
                         line=dict(width=2, color='rgb(106, 181, 135)'))
    ydiff = [y[i] - ycap[i] for i in range(len(y))]
    trace_3 = go.Scatter(x=list(range(len(y))),
                         y=ydiff,
                         line=dict(width=2, color='rgb(236, 10, 15)'))

    fig1 = go.Figure(data=[trace_1, trace_2], layout=y_ycap_title)
    fig2 = go.Figure(data=[trace_3], layout=error_title)
    error_mean = html.H2('Error Mean = ' +
                         str(round(db.get('lr.error_mean'), 4)))

    ##Team 5 API Integration
    anova = get_anova(y, ycap, len(params))
    db.put('lr.anova', anova)
    anova_div = common.get_anova_div(anova)

    return (
        common.success_msg("Linear Regression API Exceuted Successfully!!"),
        table1, table2, fig1, fig2, error_mean, anova_div)
Exemple #8
0
def stats_table_and_hor_regression(json_ordered_data, hor_order):
    if json_ordered_data is None or hor_order is None:
        return (common.msg(None), generate_table(pd.DataFrame(columns=[])),
                generate_table(pd.DataFrame(columns=[])), y_ycap_fig,
                error_fig, "", "", "")
    dff = pd.read_json(json_ordered_data, orient='split')
    col = list(dff.columns)
    x_col = [col[0]]
    y_col = col[1]
    dff = dff.sort_values(by=x_col)
    x = list(dff[col[0]])
    y = list(dff[col[1]])

    ##Team 4 API Integration
    try:
        db.put("hor.x_col", x_col)
        db.put("hor.y_col", y_col)
        (ycap, params) = Building_model_equation(x, y, hor_order)
        print(params)
        params = params.tolist()
        print(params)
        db.put("hor.params", params)
        db.put("hor.ycap", ycap)
        db.put("hor.order", hor_order)
        error = 10.0
        for i in range(len(y)):
            error += y[i] - ycap[i]
        error_mean = error / len(y)
        db.put("hor.error_mean", error_mean)
        db.put("hor.error_mean", error_mean)

        ## Team 3 API call for Summary Statistics
        model = LinearRegression()
        (summary, params_ignore, ycap_ignore) = model.fit([x], y)
        db.put("hor.summary", summary)
    except (Exception, ValueError) as e:
        return (common.error_msg("Higher Order Regression API Error: " +
                                 str(e)),
                generate_table(pd.DataFrame(columns=[])),
                generate_table(pd.DataFrame(columns=[])), y_ycap_fig,
                error_fig, "", "", "")

    df_stats = common.get_stats_df(summary, x_col, y_col)
    table1 = dbc.Table.from_dataframe(df_stats,
                                      striped=True,
                                      bordered=True,
                                      hover=True,
                                      style=common.table_style)

    df_coeff = common.hor_get_coeff_df(params)
    table2 = dbc.Table.from_dataframe(df_coeff,
                                      striped=True,
                                      bordered=True,
                                      hover=True,
                                      style=common.table_style)

    trace_actual = go.Scatter(x=x,
                              y=y,
                              name='Y Actual',
                              mode='markers',
                              marker=dict(color='rgb(106, 181, 135)'))

    trace_predict = go.Scatter(x=x,
                               y=ycap,
                               name='Y Predicted (ŷ)',
                               line=dict(width=2, color='rgb(229, 151, 50)'))

    ydiff = [y[i] - ycap[i] for i in range(len(y))]
    trace_error = go.Scatter(x=x,
                             y=ydiff,
                             line=dict(width=2, color='rgb(236, 10, 15)'))

    x_title = "x (" + str(x_col[0]) + ")"
    y_title = "y,ŷ(" + str(y_col) + ")"
    y_ycap_title = go.Layout(title='Actual vs Predicted Y Plot',
                             hovermode='closest',
                             xaxis={'title': x_title},
                             yaxis={'title': y_title})
    error_title = go.Layout(title='Error Plot',
                            hovermode='closest',
                            xaxis={'title': x_title},
                            yaxis={'title': 'Error = y - ŷ'})

    fig1 = go.Figure(data=[trace_predict, trace_actual], layout=y_ycap_title)
    fig2 = go.Figure(data=[trace_error], layout=error_title)
    error_mean = html.H2('Error Mean = ' +
                         str(round(db.get('hor.error_mean'), 4)))

    ##Team 5 API Integration
    anova = get_anova(y, ycap, len(params))
    db.put('hor.anova', anova)
    anova_div = common.get_anova_div(anova)

    independent_var = ','.join(x_col)

    return (common.success_msg(
        "Higher Order Regression API Exceuted Successfully!!"), table1, table2,
            fig1, fig2, error_mean, anova_div, html.H2(independent_var))
Exemple #9
0
def stats_table_and_linear_regression(json_ordered_data):
    if json_ordered_data is None:
        return (common.msg(None), generate_table(pd.DataFrame(columns=[])),
                generate_table(pd.DataFrame(columns=[])), y_ycap_fig,
                error_fig, "", "", "")
    dff = pd.read_json(json_ordered_data, orient='split')
    col = list(dff.columns)
    x_col = col[:-1]
    y_col = col[-1]

    if len(x_col) == 1:
        dff = dff.sort_values(by=x_col)

    data = [[] for i in range(len(x_col))]
    for i in range(len(x_col)):
        x = dff[x_col[i]].values.tolist()
        data[i] = x
    y = list(dff[col[-1]])
    ##Team 3 API Integration
    try:
        model = LinearRegression()
        db.put("lr.model", model)
        db.put("lr.x_col", x_col)
        db.put("lr.y_col", y_col)
        (summary, params, ycap) = model.fit(data, y)
        db.put("lr.summary", summary)
        db.put("lr.params", params)
        db.put("lr.ycap", ycap)
        error_mean = model.model_stats()['mean']
        db.put("lr.error_mean", error_mean)
    except (Exception, ValueError) as e:
        return (common.error_msg("Linear Regression API Error: " + str(e)),
                generate_table(pd.DataFrame(columns=[])),
                generate_table(pd.DataFrame(columns=[])), y_ycap_fig,
                error_fig, "", "", "")

    df_stats = common.get_stats_df(summary, x_col, y_col)
    table1 = dbc.Table.from_dataframe(df_stats,
                                      striped=True,
                                      bordered=True,
                                      hover=True,
                                      style=common.table_style)

    df_coeff = common.get_coeff_df(params, x_col)
    table2 = dbc.Table.from_dataframe(df_coeff,
                                      striped=True,
                                      bordered=True,
                                      hover=True,
                                      style=common.table_style)

    if len(data) == 1:
        trace_x = data[0]
        x_title = "x (" + str(x_col[0]) + ")"
        trace_actual = go.Scatter(x=trace_x,
                                  y=y,
                                  name='Y Actual',
                                  mode='markers',
                                  marker=dict(color='rgb(106, 181, 135)'))
    else:
        trace_x = list(range(len(y)))
        x_title = 'Sequence of data points'
        trace_actual = go.Scatter(x=trace_x,
                                  y=y,
                                  name='Y Actual',
                                  line=dict(width=2,
                                            color='rgb(106, 181, 135)'))

    trace_predict = go.Scatter(x=trace_x,
                               y=ycap,
                               name='Y Predicted (ŷ)',
                               line=dict(width=2, color='rgb(229, 151, 50)'))

    ydiff = [y[i] - ycap[i] for i in range(len(y))]
    trace_error = go.Scatter(x=trace_x,
                             y=ydiff,
                             line=dict(width=2, color='rgb(236, 10, 15)'))

    y_title = "y,ŷ(" + str(y_col) + ")"
    y_ycap_title = go.Layout(title='Actual vs Predicted Y Plot',
                             hovermode='closest',
                             xaxis={'title': x_title},
                             yaxis={'title': y_title})
    error_title = go.Layout(title='Error Plot',
                            hovermode='closest',
                            xaxis={'title': x_title},
                            yaxis={'title': 'Error = y - ŷ'})

    fig1 = go.Figure(data=[trace_predict, trace_actual], layout=y_ycap_title)
    fig2 = go.Figure(data=[trace_error], layout=error_title)
    error_mean = html.H2('Error Mean = ' +
                         str(round(db.get('lr.error_mean'), 4)))

    ##Team 5 API Integration
    anova = get_anova(y, ycap, len(params))
    db.put('lr.anova', anova)
    anova_div = common.get_anova_div(anova)

    independent_var = ','.join(x_col)

    return (
        common.success_msg("Linear Regression API Exceuted Successfully!!"),
        table1, table2, fig1, fig2, error_mean, anova_div,
        html.H2(independent_var))