def apply_file_properties(n): file = db.get("file") format = db.get("format") sep = db.get("file_separator") header = db.get("file_header") div = None if format is None: div = None elif (format == 'csv' or format == 'txt') and header is None: div = common.error_msg('Please Select Header!!') elif format == 'csv' or format == 'txt': if sep is None: sep = ',' db.put("file_separator", sep) path = FileUtils.path('raw', file) df = DataUtils.read_csv(path, sep, header) db.put("data", df) msg = "Following Properties Applied. Separator=" + sep + " Header=" + str( header) table = dbc.Table.from_dataframe(df.head(10), striped=True, bordered=True, hover=True, style=common.table_style) div = [common.msg(msg), table] return div
def display_data(value): """Displaying the head for the selected file.""" db_value = db.get("file") if value is None and db_value is None: return "" elif value is None and not db_value is None: value = db_value elif not value == db_value: db.reset() format = FileUtils.file_format(value) if format == 'csv' or format == 'txt': path = FileUtils.path('raw', value) head = DataUtils.read_text_head(path) table_col = [html.Col(style = {'width':"10%"}), html.Col(style = {'width':"90%"})] table_header = [html.Thead(html.Tr([html.Th("Row No"), html.Th("Data")]))] rows = [] for i in range(len(head)): row = html.Tr([html.Td(i+1), html.Td(head[i])]) rows.append(row) table_body = [html.Tbody(rows)] table = dbc.Table(table_col+ table_header + table_body, bordered=True, style = common.table_style) div = [common.msg("Selected File: " + value), common.msg("Selected Format: " + format), table, html.Br(), csv_properties_div] elif format == 'jpeg' or format == 'jpg' or format == 'gif': div = [common.msg("Selected File: " + value), common.msg("Selected Format: " + format)] else: div = "Format Not Supported!!" db.put("file", value) db.put("format", format) return div
def sgd_model_predict(n_clicks): var = db.get('sgd.model_variables') predict_data = db.get("sgd.model_prediction_data") summary = db.get('sgd.model_summary') model = db.get('sgd.model') yu = db.get('sgd.model_yu') n_var = len(var) if predict_data is None: return ("", "") if len(predict_data.split(',')) != n_var: return (common.error_msg('Enter Valid Prediction Data!!'), "") try: layer = db.get("sgd.model_layer") if layer == 1: feature_vector = get_predict_data_list(predict_data) df = pd.DataFrame(columns=var) df.loc[0] = feature_vector prediction = ann_predict(df, model, yu) elif layer == 2: feature_vector = get_predict_data_list(predict_data) prediction = ann_predict_h2(feature_vector, model, yu) reverse_quantized_classes = db.get('sgd.reverse_quantized_classes') prediction = reverse_quantized_classes[int(prediction)] db.put('sgd.prediction', prediction) except Exception as e: traceback.print_exc() return (common.error_msg("Exception during prediction: " + str(e)), "") return common.success_msg('Predicted/Classified Class = ' + prediction)
def knn_model_predict(n_clicks): c = db.get('knn.model_class') predict_data = db.get("knn.model_prediction_data") var = db.get('knn.model_variables') n_var = len(var) k = db.get('knn.distance') train_df = db.get('knn.data_train') if predict_data is None: return ("" , "") if len(predict_data.split(',')) != n_var: return (common.error_msg('Enter Valid Prediction Data!!'), "") try: cols = [] + var cols.append(c) train_dataset = train_df[cols].astype(str).values.tolist() feature_vector = get_predict_data_list(predict_data) feature_vector.append('') feature_vector = [feature_vector] result = knn_predict(train_dataset, feature_vector, k) prediction = result[0][-1] print(prediction) db.put('knn.prediction', prediction) except Exception as e: traceback.print_exc() return (common.error_msg("Exception during prediction: " + str(e)), "") df = db.get('knn.data_train') df = df.iloc[:, :-1] div = html.Div([ html.Div([html.H2("Predicted & Training Data Set Scatter Plot")], style={'width': '100%', 'display': 'flex', 'align-items': 'center', 'justify-content': 'center'}), dbc.Row([ dbc.Col([ dbc.Label("Select X Axis"), dcc.Dropdown( id = 'knn-x-axis-predict', options=[{'label':col, 'value':col} for col in [*df]], value=None, multi=False ), html.Br(), dbc.Label("Select Y Axis"), dcc.Dropdown( id = 'knn-y-axis-predict', options=[{'label':col, 'value':col} for col in [*df]], value=None, multi=False ), html.Br(), dbc.Button("Plot", color="primary", id = 'knn-predict-scatter-plot-button'), html.Div([], id = "knn-x-axis-predict-do-nothing"), html.Div([], id = "knn-y-axis-predict-do-nothing") ], md=2, style = {'margin': '10px', 'font-size': '16px'}), dbc.Col([], md=9, id="knn-scatter-plot-predict") ]), ]) return (common.success_msg('Predicted/Classified Class = ' + prediction), div)
def dtn_model_train(n_clicks): c = db.get('dtn.model_class') var = db.get('dtn.model_variables') max_depth = db.get('dtn.max_depth') min_size = db.get('dtn.min_size') folds = 5 if c is None or var is None or max_depth is None or min_size is None: div = "" elif (not c is None) and (not var is None) and (not max_depth is None) and (not min_size is None): try: path = FileUtils.path('extra', 'banknote.csv') tree, avg_score, avg_f1_score = train(path, max_depth, min_size, folds) summary = {} summary['Max Depth'] = max_depth summary['Min Size'] = min_size summary['Folds'] = folds summary['Average Score'] = round(avg_score, 4) summary['Average F1 Score'] = round(avg_f1_score, 4) summary_df = pd.DataFrame(summary.items(), columns=['Parameters', 'Value']) db.put('dtn.model_summary', summary) db.put('dtn.model_instance', tree) except Exception as e: traceback.print_exc() return common.error_msg("Exception during training model: " + str(e)) div = html.Div([ html.H2('Model Parameters & Summary:'), dbc.Table.from_dataframe(summary_df, striped=True, bordered=True, hover=True, style = common.table_style), html.Br(), html.H2('Tree'), html.H2(str(tree)), ]) else: div = common.error_msg('Select Proper Model Parameters!!') return div
def dt_model_predict(n_clicks): var = db.get('dt.model_variables') predict_data = db.get("dt.model_prediction_data") model = db.get('dt.model_instance') n_var = len(var) if predict_data is None: return ("" , "") if len(predict_data.split(',')) != n_var: return (common.error_msg('Enter Valid Prediction Data!!'), "") try: feature_vector = get_predict_data_list(predict_data) feature_vector.append(-1) feature_vector = [feature_vector] prediction = model.predict(feature_vector) print(prediction) prediction = str(prediction[0]) db.put('dt.prediction', prediction) except Exception as e: traceback.print_exc() return (common.error_msg("Exception during prediction: " + str(e)), "") return common.success_msg('Predicted/Classified Class = ' + prediction)
def dt_display_selected_file_scatter_plot(value): db_value = db.get("dt.file") if value is None and db_value is None: return common.msg("Please select a cleaned file to proceed!!") elif value is None and not db_value is None: value = db_value db.put("dt.file", value) file = value path = FileUtils.path('clean', file) df = DataUtils.read_csv(path) db.put("dt.data", df) div = html.Div([ common.msg("Selected cleaned file: "+ file), dbc.Table.from_dataframe(df.head(10).astype(str), striped=True, bordered=True, hover=True, style = common.table_style), #html.Div([html.H3("Data Statistics")], style={'width': '100%', 'display': 'flex', 'align-items': 'center', 'justify-content': 'center'}), #dbc.Table.from_dataframe(stats, striped=True, bordered=True, hover=True, style = common.table_style), html.Br(), get_dt_model_properties_div(df), html.Div([], id = "dt-trained-model", style = {'margin': '10px'}), ]) return div
def dtn_display_selected_file_scatter_plot(value): value = "banknote" db.put("dtn.file", value) file = value path = FileUtils.path('clean', file) df = DataUtils.read_csv(path) save_path = FileUtils.path('extra', 'banknote.csv') df.to_csv(save_path, index=False, header = False) db.put("dtn.data", df) db.put('dtn.model_class', 'class') db.put('dtn.model_variables', ['variance','skewness','curtosis','entropy']) call_path = FileUtils.path('nets', 'dt_banknote_call1.csv') cdf = DataUtils.read_csv(call_path) trace_1 = go.Scatter(x = cdf['max_depth'], y = cdf['avg_train_score'], name = 'Average Train Score') trace_2 = go.Scatter(x = cdf['max_depth'], y = cdf['avg_test_score'], name = 'Average Test Score') title = go.Layout(title = 'Depth of Tree Vs Performance Plot', hovermode = 'closest', xaxis={'title': 'Depth of Tree'}, yaxis={'title': 'Performance'}) fig = go.Figure(data = [trace_1, trace_2], layout = title) div = html.Div([ common.msg("Selected cleaned file: "+ file), dbc.Table.from_dataframe(df.head(10).round(5).astype(str), striped=True, bordered=True, hover=True, style = common.table_style), html.Br(), html.H2('Using Default parameters for both max_depth and min_size.'), html.H2('Max Depth = 2 to 15'), html.H2('Min Size = 10'), dbc.Table.from_dataframe(cdf.round(4), striped=True, bordered=True, hover=True, style = common.table_style), html.Br(), dcc.Graph(id='dtn-plot', figure=fig), html.Br(), get_dtn_model_properties_div(df), dcc.Loading(id="dtn-model-training", children=[html.Div([], id = "dtn-trained-model", style = {'margin': '10px'})], type="default"), ]) return div
def knn_model_prediction_data(value): if not value is None: db.put("knn.model_prediction_data", value) return None
def knn_model_train(n_clicks): c = db.get('knn.model_class') var = db.get('knn.model_variables') train = db.get('knn.model_train') k = db.get('knn.distance') file = db.get("knn.file") if c is None and var is None and train is None and k is None: div = "" elif train is None or train < 0 or train > 100: div = common.error_msg('Training % should be between 0 - 100 !!') elif (not c is None) and (not var is None) and (not train is None) and (not k is None): try: cols = [] + var cols.append(c) df = db.get('knn.data') df = df[cols] train_df, test_df = common.split_df(df, c, train) distinct_count_df_total = get_distinct_count_df(df, c, 'Total Count') distinct_count_df_train = get_distinct_count_df(train_df, c, 'Training Count') distinct_count_df_test = get_distinct_count_df(test_df, c, 'Testing Count') distinct_count_df = distinct_count_df_total.join(distinct_count_df_train.set_index('Class'), on='Class') distinct_count_df = distinct_count_df.join(distinct_count_df_test.set_index('Class'), on='Class') train_dataset = train_df[cols].astype(str).values.tolist() test_dataset = test_df[cols].astype(str).values.tolist() result = knn_predict(train_dataset, test_dataset, k) cc_percentage = calculate_predict_accuracy(result) summary = {} summary['Total Training Data'] = len(train_df) summary['Total Testing Data'] = len(test_df) summary['Total Number of Features in Dataset'] = len(var) summary['Model Accuracy %'] = round(cc_percentage, 2) summary['Features'] = str(var) summary_df = pd.DataFrame(summary.items(), columns=['Parameters', 'Value']) db.put('knn.data_train', train_df) db.put('knn.data_test', test_df) db.put('knn.model_summary', summary) classes = df[c].unique() confusion_df = get_confusion_matrix(result, classes) except Exception as e: traceback.print_exc() return common.error_msg("Exception during training model: " + str(e)) div = html.Div([ html.H2('Class Grouping in Data:'), dbc.Table.from_dataframe(distinct_count_df, striped=True, bordered=True, hover=True, style = common.table_style), html.H2('Model Parameters & Summary:'), dbc.Table.from_dataframe(summary_df, striped=True, bordered=True, hover=True, style = common.table_style), html.H2('Confusion Matrix (Precision & Recall):'), dbc.Table.from_dataframe(confusion_df, striped=True, bordered=True, hover=True, style = common.table_style), html.H2('Prediction/Classification:'), html.P('Features to be Predicted (comma separated): ' + ','.join(var), style = {'font-size': '16px'}), dbc.Input(id="knn-prediction-data", placeholder=','.join(var), type="text"), html.Br(), dbc.Button("Predict", color="primary", id = 'knn-predict'), html.Div([], id = "knn-prediction"), html.Div([],id = "knn-predicted-scatter-plot") ]) else: div = common.error_msg('Select Proper Model Parameters!!') return div
def knn_model_lr(value): if not value is None: db.put("knn.distance", value) return None
def knn_model_train(value): if not value is None: db.put("knn.model_train", value) return None
def sgd_model_train(value): if not value is None: db.put("sgd.model_train", value) return None
def nlcl_model_class(value): if not value is None: db.put("nlcl.model_class", value) return None
def nlcl_model_prediction_data(value): if not value is None: db.put("nlcl.model_prediction_data", value) return None
def nlcl_model_train(value): if not value is None: db.put("nlcl.model_train", value) return None
def sgd_model_prediction_data(value): if not value is None: db.put("sgd.model_prediction_data", value) return None
def sgd_model_train(n_clicks): c = db.get('sgd.model_class') var = db.get('sgd.model_variables') train = db.get('sgd.model_train') #test = db.get('sgd.model_test') lr = db.get('sgd.model_lr') epoch = db.get('sgd.model_epoch') #no_of_hidden_layer = db.get('sgd.no_of_hidden_layer') no_of_neuron = db.get('sgd.no_of_neuron') no_of_neuron_h2 = db.get('sgd.no_of_neuron_h2') layer = 1 if not no_of_neuron_h2 is None: layer = 2 db.put("sgd.model_layer", layer) if c is None and var is None and train is None and lr is None and epoch is None: div = "" elif train is None or train < 0 or train > 100: div = common.error_msg('Training % should be between 0 - 100 !!') elif (not c is None) and (not var is None) and (not train is None) and ( not lr is None) and (not epoch is None): try: cols = [] + var cols.append(c) df = db.get('sgd.data') df = df[cols] ## Make DataFrame compatible for SGD API ## df, quantized_classes, reverse_quantized_classes = quantized_class( df, c) train_df, test_df = common.split_df(df, c, train) distinct_count_df_total = get_distinct_count_df( df, c, 'Total Count') distinct_count_df_train = get_distinct_count_df( train_df, c, 'Training Count') distinct_count_df_test = get_distinct_count_df( test_df, c, 'Testing Count') distinct_count_df = distinct_count_df_total.join( distinct_count_df_train.set_index('Class'), on='Class') distinct_count_df = distinct_count_df.join( distinct_count_df_test.set_index('Class'), on='Class') distinct_count_df['Class'] = distinct_count_df['Class'].map( reverse_quantized_classes) if layer == 1: ycap, loss_dict, cc_percentage, wc_percentage, model, yu = ann_training( train_df[var], train_df[c], no_of_neuron, lr, epoch) ycap, cc_percentage, wc_percentage = ann_testing( test_df[var], test_df[c], model, yu) elif layer == 2: ycap, loss_dict, cc_percentage, wc_percentage, model, yu = ann_training_h2( train_df[var], train_df[c], no_of_neuron, no_of_neuron_h2, lr, epoch) ycap, cc_percentage, wc_percentage = ann_testing_h2( train_df[var], train_df[c], model, yu) summary = {} summary['Total Training Data'] = len(train_df) summary['Total Testing Data'] = len(test_df) summary['Total Number of Features in Dataset'] = len(var) summary['Total no of Layers'] = layer + 2 summary['No of Hidden Layer'] = layer summary['No of Neuron in Hidden Layer 1'] = no_of_neuron summary['No of Neuron in Hidden Layer 2'] = no_of_neuron_h2 summary['Activation Function'] = 'Sigmoid' summary['Learning rate'] = lr summary['Epochs'] = epoch summary['Model Accuracy'] = round(cc_percentage, 2) summary['Features'] = str(var) summary_df = pd.DataFrame(summary.items(), columns=['Parameters', 'Value']) db.put('sgd.data_train', train_df) db.put('sgd.data_test', test_df) db.put('sgd.quantized_classes', quantized_classes) db.put('sgd.reverse_quantized_classes', reverse_quantized_classes) db.put('sgd.model', model) db.put('sgd.model_yu', yu) db.put('sgd.summary', summary) confusion_df = get_confusion_matrix(test_df, c, var, model, yu, reverse_quantized_classes) except Exception as e: traceback.print_exc() return common.error_msg("Exception during training model: " + str(e)) trace = go.Scatter(x=loss_dict['Epoch_no'], y=loss_dict['Loss'], line=dict(width=2, color='rgb(106, 181, 135)')) convergence_title = go.Layout(title='Convergence Plot', hovermode='closest', xaxis={'title': 'Epoch'}, yaxis={'title': 'Loss Function'}) convergence_fig = go.Figure(data=[trace], layout=convergence_title) div = html.Div([ html.H2('Class Grouping in Data:'), dbc.Table.from_dataframe(distinct_count_df, striped=True, bordered=True, hover=True, style=common.table_style), html.H2('Model Parameters & Summary:'), dbc.Table.from_dataframe(summary_df, striped=True, bordered=True, hover=True, style=common.table_style), html.Br(), dcc.Graph(id='sgd-convergence-plot', figure=convergence_fig), html.H2('Confusion Matrix (Precision & Recall):'), dbc.Table.from_dataframe(confusion_df, striped=True, bordered=True, hover=True, style=common.table_style), html.Br(), html.Br() ]) else: div = common.error_msg('Select Proper Model Parameters!!') return div
def sgd_model_epoch(value): if not value is None: db.put("sgd.model_epoch", value) return None
def sgd_model_lr(value): if not value is None: db.put("sgd.model_lr", value) return None
def sgd_model_neuron_h2(value): if not value is None: db.put("sgd.no_of_neuron_h2", value) return None
def knn_y_axis(value): if not value is None: db.put("knn.y_axis_predict", value) return None
def nlcl_y_axis(value): if not value is None: db.put("nlcl.y_axis", value) return None
def knn_model_variables(value): if not value is None: db.put("knn.model_variables", value) return None
def nlcl_model_variables(value): if not value is None: db.put("nlcl.model_variables", value) return None
def sgd_model_variables(value): if not value is None: db.put("sgd.model_variables", value) return None
def nlcl_model_train(n_clicks): c = db.get('nlcl.model_class') var = db.get('nlcl.model_variables') train = db.get('nlcl.model_train') if c is None and var is None and train is None: div = "" elif train is None or train < 0 or train > 100: div = common.error_msg('Training % should be between 0 - 100 !!') elif len(var) != 2: div = common.error_msg('Select Two Features!!') elif (not c is None) and (not var is None) and (not train is None): try: cols = [] + var cols.append(c) df = db.get('nlcl.data') df = df[cols] train_df, test_df = common.split_df(df, c, train) train_df.columns = ['X1', 'X2', 'Class'] distinct_count_df_total = get_distinct_count_df(df, c, 'Total Count') distinct_count_df_train = get_distinct_count_df(train_df, c, 'Training Count') distinct_count_df_test = get_distinct_count_df(test_df, c, 'Testing Count') distinct_count_df = distinct_count_df_total.join(distinct_count_df_train.set_index('Class'), on='Class') distinct_count_df = distinct_count_df.join(distinct_count_df_test.set_index('Class'), on='Class') model = non_separable_train(train_df) print(model) summary = {} summary['Total Training Data'] = len(train_df) summary['Total Testing Data'] = len(test_df) summary['Total Number of Features in Dataset'] = len(var) summary['Model Accuracy %'] = 'TODO' summary['Features'] = str(var) summary_df = pd.DataFrame(summary.items(), columns=['Parameters', 'Value']) db.put('nlcl.data_train', train_df) db.put('nlcl.data_test', test_df) db.put('nlcl.model_summary', summary) db.put('nlcl.model_instance', model) #confusion_df = get_confusion_matrix(test_df, c, var, instanceOfLR) except Exception as e: traceback.print_exc() return common.error_msg("Exception during training model: " + str(e)) clazz_col = c train_df.columns = cols df = train_df x_col = var[0] y_col = var[1] x1, y1 = get_rect_coordinates(model[0]) x2, y2 = get_rect_coordinates(model[1]) x3, y3 = get_rect_coordinates(model[2]) graph_data = [ go.Scatter( x=df[df[clazz_col] == clazz][x_col], y=df[df[clazz_col] == clazz][y_col], text=df[df[clazz_col] == clazz][clazz_col], mode='markers', opacity=0.8, marker={ 'size': 15, 'line': {'width': 0.5, 'color': 'white'} }, name=clazz ) for clazz in df[clazz_col].unique() ] graph_data.append(go.Scatter(x=x1, y=y1, text = 'Specific Rectangle', name = 'Specific Rectangle')) graph_data.append(go.Scatter(x=x3, y=y3, text = 'Optimal Rectangle', name = 'Optimal Rectangle')) graph_data.append(go.Scatter(x=x2, y=y2, text = 'Generic Rectangle', name = 'Generic Rectangle')) graph = dcc.Graph( id='nlcl-x-vs-y-rectangle', figure={ 'data': graph_data, 'layout': dict( title='Boundaries & Train Data Set Scatter Plot', xaxis={'title': x_col}, yaxis={'title': y_col}, margin={'l': 40, 'b': 40}, legend={'x': 0, 'y': 1}, hovermode='closest' ) } ) div = html.Div([ html.H2('Class Grouping in Data:'), dbc.Table.from_dataframe(distinct_count_df, striped=True, bordered=True, hover=True, style = common.table_style), html.H2('Model Parameters & Summary:'), dbc.Table.from_dataframe(summary_df, striped=True, bordered=True, hover=True, style = common.table_style), html.Br(), graph, #html.H2('Confusion Matrix (Precision & Recall):'), #dbc.Table.from_dataframe(confusion_df, striped=True, bordered=True, hover=True, style = common.table_style), html.H2('Prediction/Classification:'), html.P('Features to be Predicted (comma separated): ' + ','.join(var), style = {'font-size': '16px'}), dbc.Input(id="nlcl-prediction-data", placeholder=','.join(var), type="text"), html.Br(), dbc.Button("Predict", color="primary", id = 'nlcl-predict'), html.Div([], id = "nlcl-prediction"), html.Div([],id = "nlcl-predicted-scatter-plot") ]) else: div = common.error_msg('Select Proper Model Parameters!!') return div
def knn_x_axis(value): if not value is None: db.put("knn.x_axis", value) return None
def nlcl_display_selected_file_scatter_plot(value): db_value = db.get("nlcl.file") if value is None and db_value is None: return common.msg("Please select a cleaned file to proceed!!") elif value is None and not db_value is None: value = db_value db.put("nlcl.file", value) file = value path = FileUtils.path('clean', file) df = DataUtils.read_csv(path) db.put("nlcl.data", df) stats = df.describe(include = 'all').head(6).round(5) stats.insert(loc=0, column='Statistics', value=['Count','unique','top','freq','Mean','Standard Deviation']) stats = stats.drop(stats.index[[1,2,3]]) div = html.Div([ common.msg("Selected cleaned file: "+ file), dbc.Table.from_dataframe(df.head(10), striped=True, bordered=True, hover=True, style = common.table_style), html.Div([html.H3("Data Statistics")], style={'width': '100%', 'display': 'flex', 'align-items': 'center', 'justify-content': 'center'}), dbc.Table.from_dataframe(stats, striped=True, bordered=True, hover=True, style = common.table_style), html.Br(), html.Div([html.H2("Scatter Plot")], style={'width': '100%', 'display': 'flex', 'align-items': 'center', 'justify-content': 'center'}), dbc.Row([ dbc.Col([ dbc.Label("Select Class"), dcc.Dropdown( id = 'nlcl-class', options=[{'label':col, 'value':col} for col in [*df]], value=None, multi=False ), html.Br(), dbc.Label("Select X Axis"), dcc.Dropdown( id = 'nlcl-x-axis', options=[{'label':col, 'value':col} for col in [*df]], value=None, multi=False ), html.Br(), dbc.Label("Select Y Axis"), dcc.Dropdown( id = 'nlcl-y-axis', options=[{'label':col, 'value':col} for col in [*df]], value=None, multi=False ), html.Br(), dbc.Button("Plot", color="primary", id = 'nlcl-scatter-plot-button'), html.Div([], id = "nlcl-class-do-nothing"), html.Div([], id = "nlcl-x-axis-do-nothing"), html.Div([], id = "nlcl-y-axis-do-nothing") ], md=2, style = {'margin': '10px', 'font-size': '16px'}), dbc.Col([], md=9, id="nlcl-scatter-plot") ]), html.Br(), get_nlcl_model_properties_div(df), html.Div([], id = "nlcl-trained-model", style = {'margin': '10px'}), ]) return div
def knn_model_class(value): if not value is None: db.put("knn.model_class", value) return None