def drop_nan_rows(df, threshold, ttype, opstat, display=True): """ * -------------------------------------------------------------------------- * function : drop rows with nans greater than threshold * * parms : * df - dataframe * threshold - threshold value * ttype - threshold type * * returns : * N/A * -------------------------------------------------------- """ import math if (display): clock = RunningClock() clock.start() try: if (ttype == dim.BY_PERCENT): thold = math.floor(len(df.columns) * (float(threshold) * 0.01)) else: thold = math.floor(float(threshold)) nanslist = df.isnull().sum(axis=1).tolist() #< thold criteria = nanslist dropcount = 0 for i in range(len(nanslist)): if (nanslist[i]) < thold: criteria[i] = True else: criteria[i] = False dropcount = dropcount + 1 if (dropcount > 0): df = df[criteria] cfg.set_dfc_dataframe_df( cfg.get_config_value(cfg.CURRENT_INSPECTION_DF), df) except Exception as e: opstat.store_exception("Error dropping nan rows\n ", e) display_exception(opstat) if (display): clock.stop() #make scriptable add_to_script([ "# Drop NAN Rows ", "from dfcleanser.data_inspection.data_inspection_control import drop_nan_rows", "drop_nan_rows(" + str(threshold) + ",False)" ], opstat) return ([dropcount, len(df)])
def export_sql_table(parms, display=True): """ * -------------------------------------------------------------------------- * function : export a sql table into pandas dataframe * * parms : * parms - sql parms * display - display flag * * returns : N/A * -------------------------------------------------------- """ opstat = opStatus() dew.display_export_main_taskbar() print("export_sql_table", parms) save_data_export_start() clock = RunningClock() clock.start() dbid = cfg.get_config_value(cfg.CURRENT_DB_ID_KEY) dbcondict = set_dbcon_dict(dbid, get_stored_con_Parms(dbid)) sqltableparms = dew.get_sqltable_export_inputs(parms) (export_notes, opstat) = export_pandas_sqltable(sqltableparms, dbcondict, dew.pandas_export_sqltable_id) clock.stop() if (opstat.get_status()): for i in range(len(sqltableparms)): sqltableparms[i] = get_string_value(sqltableparms[i]) sqltableparms = sqltableparms[0:8] sqltableparms.append(export_notes) sqltablelabels = dew.pandas_export_sqltable_labelList[0:8] sqltablelabels.append("DB Connector String") display_data_export_parms("Pandas SQL Table Export Parms", sqltablelabels, sqltableparms, cfg.DataExport_ID, sqltableparms[1], True) else: display_exception(opstat)
def test_export_sql_db_connector(driverid, sqlinputparms): """ * -------------------------------------------------------------------------- * function : test the sql db connector * * parms : * importtype - pandas export identifier * sqlinputparms - connection string * * returns : N/A * -------------------------------------------------------- """ opstat = opStatus() try: export_test_sql_db_connector(driverid, sqlinputparms) except Exception as e: opstat.store_exception("DB Connection failed ", e) display_exception(opstat)
def save_datastructures_file(self,dstype,creator) : try : with open(self.get_datastructures_file_name(dstype,creator), 'w') as datastructures_file : if(dstype == DICT_ID) : if(creator == DFC_CREATED) : json.dump(self.dictStore,datastructures_file) else : json.dump(self.userdictStore,datastructures_file) else : if(creator == DFC_CREATED) : json.dump(self.listStore,datastructures_file) else : json.dump(self.userlistStore,datastructures_file) datastructures_file.close() except Exception as e: opstat = opStatus() opstat.store_exception("Unable to save file " + self.get_datastructures_file_name(dstype,creator),e) display_exception(opstat)
def export_test_sql_db_connector(driverid, sqlinputparms): """ * -------------------------------------------------------------------------- * function : export pandas dataframe into sql table * * parms : * driverid - driver id * sqlinputparms - connection string * * returns : N/A * -------------------------------------------------------- """ try: from dfcleanser.common.db_utils import test_db_connector, SQL_EXPORT, NATIVE, SQLALCHEMY opstat = test_db_connector(cfg.get_config_value(cfg.CURRENT_DB_ID_KEY), NATIVE, sqlinputparms, SQL_EXPORT) opstat = test_db_connector(cfg.get_config_value(cfg.CURRENT_DB_ID_KEY), SQLALCHEMY, sqlinputparms, SQL_EXPORT) except Exception as e: opstat.store_exception("DB Connection failed ", e) display_exception(opstat)
def load_datastructures_file(self,dstype,creator) : try : with open(self.get_datastructures_file_name(dstype,creator), 'r') as datastructures_file : if(dstype == DICT_ID) : if(creator == DFC_CREATED) : self.dictStore = json.load(datastructures_file) else : self.userdictStore = json.load(datastructures_file) else : if(creator == DFC_CREATED) : self.listStore = json.load(datastructures_file) else : self.userlistStore = json.load(datastructures_file) datastructures_file.close() except Exception as e: if(dstype == DICT_ID) : if(creator == DFC_CREATED) : self.dictStore = {} else : self.userdictStore = {} else : if(creator == DFC_CREATED) : self.listStore = {} else : self.userlistStore = {} opstat = opStatus() opstat.store_exception("Unable to load common file : " + self.get_datastructures_file_name(dstype,creator),e) display_exception(opstat)
def display_inspect_categories(): """ * -------------------------------------------------------------------------- * function : display the inspect categoriies option * * parms : * * returns : * N/A * -------------------------------------------------------- """ opstat = opStatus() clock = RunningClock() clock.start() try: cattable = dcTable("Category Columns", "catcolsTable", cfg.DataInspection_ID) catcandidatetable = dcTable("Category Candidate Columns", "catcandcolsTable", cfg.DataInspection_ID) numcats, numcands = diw.display_df_categories( cfg.get_current_chapter_df(cfg.DataInspection_ID), cattable, catcandidatetable) except Exception as e: opstat.store_exception("Error displaying category data\n ", e) clock.stop() if (not (opstat.get_status())): display_exception(opstat)
def display_script_exception(e): opstat = opStatus() opstat.store_exception("Unable to run script", e) display_exception(opstat) return ()
def display_inspect_cols(parms): """ * -------------------------------------------------------------------------- * function : display the inspect cols option * * parms : * * returns : * N/A * -------------------------------------------------------- """ opstat = opStatus() clock = RunningClock() clock.start() try: df = cfg.get_current_chapter_df(cfg.DataInspection_ID) colnames = df.columns.tolist() if (not (parms is None)): colname = parms else: colname = colnames[0] cnames = { 'default': colname, 'list': colnames, "callback": "change_inspect_cols_col", "size": 10 } if (is_numeric_col(df, colname)): coldetails_form = InputForm(diw.inspect_col_input_id, diw.inspect_col_input_idList, diw.inspect_col_input_labelList, diw.inspect_col_input_typeList, diw.inspect_col_input_placeholderList, diw.inspect_col_input_jsList, diw.inspect_col_input_reqList) else: coldetails_form = InputForm( diw.inspect_nn_col_input_id, diw.inspect_nn_col_input_idList, diw.inspect_nn_col_input_labelList, diw.inspect_nn_col_input_typeList, diw.inspect_nn_col_input_placeholderList, diw.inspect_nn_col_input_jsList, diw.inspect_nn_col_input_reqList) selectDicts = [] selectDicts.append(cnames) get_select_defaults(coldetails_form, diw.inspect_col_input_id, diw.inspect_col_input_idList, diw.inspect_col_input_typeList, selectDicts) coldetails_form.set_shortForm(True) coldetails_form.set_fullparms(True) if (cfg.get_dfc_mode() == cfg.INLINE_MODE): coldetails_form.set_gridwidth(360) if (is_numeric_col(df, colname)): coldetails_form.set_buttonstyle({ "font-size": 12, "height": 75, "width": 85, "left-margin": 2 }) else: coldetails_form.set_buttonstyle({ "font-size": 12, "height": 75, "width": 85, "left-margin": 75 }) else: coldetails_form.set_gridwidth(480) if (is_numeric_col(df, colname)): coldetails_form.set_buttonstyle({ "font-size": 12, "height": 75, "width": 110, "left-margin": 2 }) else: coldetails_form.set_buttonstyle({ "font-size": 12, "height": 75, "width": 110, "left-margin": 110 }) coldetails_html = coldetails_form.get_html() from dfcleanser.data_cleansing.data_cleansing_widgets import display_col_stats col_stats_html = display_col_stats(df, colname, False, True) gridclasses = ["dfc-left", "dfc-right"] gridhtmls = [col_stats_html, coldetails_html] if (cfg.get_dfc_mode() == cfg.INLINE_MODE): display_generic_grid("df-inspection-column-data-wrapper", gridclasses, gridhtmls) else: display_generic_grid("df-inspection-pop-up-column-data-wrapper", gridclasses, gridhtmls) except Exception as e: opstat.store_exception("Error displaying column data\n ", e) clock.stop() if (not (opstat.get_status())): display_exception(opstat)
def display_inspect_rows(rowid=0): """ * -------------------------------------------------------------------------- * function : display the inspect rows option * * parms : * * returns : * N/A * -------------------------------------------------------- """ opstat = opStatus() clock = RunningClock() clock.start() try: print("\n") from dfcleanser.data_transform.data_transform_dataframe_widgets import display_current_df_index display_current_df_index( cfg.get_current_chapter_df(cfg.DataInspection_ID), cfg.get_current_chapter_dfc_df_title(cfg.DataInspection_ID), 0, True) row_stats_html = diw.display_row_stats( cfg.get_current_chapter_df(cfg.DataInspection_ID), cfg.get_config_value(cfg.CURRENT_INSPECTION_DF), False) sample_row_html = dim.display_df_rows( cfg.get_current_chapter_df(cfg.DataInspection_ID), rowid, 200) rows_openexcel_tb = diw.get_inspection_openexcel_taskbar() rows_openexcel_tb.set_gridwidth(620) rows_openexcel_tb.set_customstyle({ "font-size": 13, "height": 90, "width": 120, "left-margin": 10 }) rows_openexcel_html = rows_openexcel_tb.get_html() rows_openexcel_html = (rows_openexcel_html + "<br>") cfg.set_config_value(cfg.CURRENT_SCROLL_ROW_KEY, rowid) gridclasses = ["dfc-top", "dfc-bottom", "dfc-footer"] gridhtmls = [row_stats_html, sample_row_html, rows_openexcel_html] if (cfg.get_dfc_mode() == cfg.INLINE_MODE): display_generic_grid("df-inspection-row-data-wrapper", gridclasses, gridhtmls) else: display_generic_grid("df-inspection-row-data-pop-up-wrapper", gridclasses, gridhtmls) except Exception as e: opstat.store_exception("Error displaying row data\n ", e) display_exception(opstat) import traceback traceback.print_exc() clock.stop()
def display_data_inspection(option, parms=None): """ * -------------------------------------------------------------------------- * function : main data inspection processing * * parms : * option - function option * parms - associated parms * * returns : * N/A * -------------------------------------------------------- """ from IPython.display import clear_output clear_output() opstat = opStatus() from dfcleanser.common.html_widgets import define_inputs, are_owner_inputs_defined if (not (are_owner_inputs_defined(cfg.DataInspection_ID))): define_inputs(cfg.DataInspection_ID, diw.datainspection_inputs) if (option == dim.MAIN_OPTION): drop_working_df() diw.display_dfc_inspection_main() clear_data_inspection_data() else: diw.display_inspection_main_taskbar() if (cfg.is_a_dfc_dataframe_loaded()): if ((option == dim.DISPLAY_DATATYPES_OPTION) or (option == dim.DISPLAY_NANS_OPTION) or (option == dim.DISPLAY_ROWS_OPTION) or (option == dim.DISPLAY_COLS_OPTION) or (option == dim.DISPLAY_CATEGORIES_OPTION)): fparms = get_parms_for_input(parms[0], diw.data_inspection_df_input_idList) if (len(fparms) > 0): cfg.set_config_value(cfg.CURRENT_INSPECTION_DF, fparms[0]) if (not (option == dim.DISPLAY_ROWS_OPTION)): drop_working_df() if ((option == dim.DISPLAY_DATATYPES_OPTION) or (option == dim.DISPLAY_FULL_COLUMN_NAMES)): df_data_info = dim.get_df_datatypes_data( cfg.get_current_chapter_df(cfg.DataInspection_ID)) display_inspect_datatypes(option, df_data_info) elif (option == dim.DISPLAY_NANS_OPTION): display_inspect_nans() elif (option == dim.DISPLAY_ROWS_OPTION): display_inspect_rows() elif (option == dim.DISPLAY_COLS_OPTION): if (len(parms) > 1): display_inspect_cols(parms[1]) else: display_inspect_cols(None) elif (option == dim.DISPLAY_CATEGORIES_OPTION): display_inspect_categories() elif ((option == dim.DROP_ROW_NANS_OPTION) or (option == dim.DROP_COL_NANS_OPTION)): thresholdType = parms[0] if (option == dim.DROP_ROW_NANS_OPTION): fparms = get_parms_for_input(parms[1], diw.drop_rows_input_idList) else: fparms = get_parms_for_input(parms[1], diw.drop_columns_input_idList) if (len(fparms) > 0): try: threshold = int(fparms[0]) except: opstat.set_status(False) if (option == dim.DROP_ROW_NANS_OPTION): opstat.set_errorMsg("Drop Nan Rows Threshold value '" + fparms[0] + "' is invalid") else: opstat.set_errorMsg("Drop Nan Cols Threshold value '" + fparms[0] + "' is invalid") threshold = None else: opstat.set_status(False) if (option == dim.DROP_ROW_NANS_OPTION): opstat.set_errorMsg( "Drop Nan Rows Threshold value is not defined") else: opstat.set_errorMsg( "Drop Nan Cols Threshold value is not defined") threshold = None if (option == dim.DROP_ROW_NANS_OPTION): if (opstat.get_status()): dropstats = drop_nan_rows( cfg.get_current_chapter_df(cfg.DataInspection_ID), threshold, thresholdType, opstat) if (not (opstat.get_status())): display_exception(opstat) else: if (dropstats[0] > 0): display_status( str(dropstats[0]) + " Nan Rows Dropped Successfully") else: display_status( "No Rows matching threshold were dropped") else: if (opstat.get_status()): numcolsdropped = drop_nan_cols( cfg.get_current_chapter_df(cfg.DataInspection_ID), threshold, thresholdType, opstat) if (not (opstat.get_status())): display_exception(opstat) else: if (numcolsdropped > 0): display_status( str(numcolsdropped) + " Columns with Nans Dropped Successfully") else: display_status( " No Columns matching threshold were dropped") elif (option == dim.DISPLAY_ROW_OPTION): display_inspect_rows() elif (option == dim.DISPLAY_COL_GRAPHS): display_inspect_graphs(parms) elif (option == dim.DISPLAY_COL_OUTLIERS): display_inspect_outliers(parms[0]) elif (option == dim.DISPLAY_SCROLL_TO_DF_ROW): diw.display_scroll_to_row() elif (option == dim.PROCESS_SCROLL_TO_DF_ROW): opstat = opStatus() df = cfg.get_current_chapter_df(cfg.DataInspection_ID) retparms = get_row_id_for_df(df, parms, diw.scroll_df_rows_input_idList, opstat) if (opstat.get_status()): if (retparms[1] == 0): display_inspect_rows(retparms[0]) else: display_inspect_rows(retparms[0]) else: diw.display_scroll_to_row() display_exception(opstat) elif (option == dim.SCROLL_DF_ROWS_DOWN): new_row_id = cfg.get_config_value(cfg.CURRENT_SCROLL_ROW_KEY) if (new_row_id is None): new_row_id = 0 else: new_row_id = new_row_id + 200 df = cfg.get_current_chapter_df(cfg.DataInspection_ID) if (new_row_id > len(df)): new_row_id = cfg.get_config_value( cfg.CURRENT_SCROLL_ROW_KEY) display_inspect_rows(new_row_id) elif (option == dim.SCROLL_DF_ROWS_UP): new_row_id = cfg.get_config_value(cfg.CURRENT_SCROLL_ROW_KEY) if (new_row_id is None): new_row_id = 0 else: new_row_id = new_row_id - 200 if (new_row_id < 0): new_row_id = 0 display_inspect_rows(new_row_id) elif (option == dim.DISPLAY_DF_ROW): print("dim.DISPLAY_DF_ROW") elif (option == dim.DISPLAY_DF_ROW_REMOTE): chapterid = parms[0] #print("chapterId",chapterid) new_config_df = None if (chapterid == cfg.DataInspection_ID): new_config_df = cfg.get_config_value(cfg.CURRENT_INSPECTION_DF) elif (chapterid == cfg.DataCleansing_ID): new_config_df = cfg.get_config_value(cfg.CURRENT_CLEANSE_DF) elif (chapterid == cfg.DataTransform_ID): new_config_df = cfg.get_config_value(cfg.CURRENT_TRANSFORM_DF) elif (chapterid == cfg.DataExport_ID): new_config_df = cfg.get_config_value(cfg.CURRENT_EXPORT_DF) elif (chapterid == cfg.DataImport_ID): new_config_df = cfg.get_config_value(cfg.CURRENT_IMPORT_DF) elif (chapterid == cfg.SWGeocodeUtility_ID): new_config_df = cfg.get_config_value(cfg.CURRENT_GEOCODE_DF) elif (chapterid == cfg.SWDFSubsetUtility_ID): new_config_df = cfg.get_config_value(cfg.CURRENT_SUBSET_DF) cfg.set_config_value(cfg.CURRENT_INSPECTION_DF, new_config_df) display_inspect_rows() else: cfg.drop_config_value(cfg.CURRENT_INSPECTION_DF) if (not (option == dim.MAIN_OPTION)): cfg.display_no_dfs(cfg.DataInspection_ID) from dfcleanser.common.display_utils import display_pop_up_buffer display_pop_up_buffer()
def display_inspect_datatypes(option, df_data_info): """ * -------------------------------------------------------------------------- * function : display the datatypes option * * parms : * * returns : * N/A * -------------------------------------------------------- """ opstat = opStatus() import matplotlib.pyplot as plt clock = RunningClock() clock.start() try: if (not (option == dim.DISPLAY_FULL_COLUMN_NAMES)): data_types_table = dcTable("Column Data Types", "datatypesTable", cfg.DataInspection_ID) else: data_types_table = None data_types_html = diw.display_df_datatypes(data_types_table, df_data_info[0], df_data_info[1], df_data_info[2], option, False) gridclasses = ["dfc-main"] gridhtmls = [data_types_html] if (cfg.get_dfc_mode() == cfg.INLINE_MODE): display_generic_grid("df-inspection-wrapper", gridclasses, gridhtmls) else: display_generic_grid("df-inspection-pop-up-wrapper", gridclasses, gridhtmls) print("\n") import matplotlib.pyplot as plt import numpy as np font = {'fontsize': 14} font2 = {'fontsize': 18} objects = [] for i in range(len(df_data_info[0])): ttype = str(df_data_info[0][i]) ttype = ttype.replace("datetime.", "") objects.append(ttype) y_pos = np.arange(len(objects)) plt.bar(y_pos, df_data_info[1], align='center', alpha=0.5, color='#428bca') plt.xticks(y_pos, objects, rotation='vertical') plt.ylabel('Type Counts', fontdict=font) plt.xlabel('Data Types', fontdict=font) plt.title('Column Data Types', fontdict=font2) plt.show() except Exception as e: opstat.store_exception("Error displaying data types\n ", e) clock.stop() if (not (opstat.get_status())): display_exception(opstat)
def display_dfsubset_utility(optionId,parms=None) : """ * --------------------------------------------------------- * function : main subset utility control * * parms : * optionId - function to run * parms - parms to ryn function * * returns : * NA * -------------------------------------------------------- """ if(cfg.is_a_dfc_dataframe_loaded()) : from IPython.display import clear_output clear_output() from dfcleanser.common.html_widgets import define_inputs, are_owner_inputs_defined if(not (are_owner_inputs_defined(cfg.SWDFSubsetUtility_ID)) ) : define_inputs(cfg.SWDFSubsetUtility_ID,swsw.SWUtility_subset_inputs) if(optionId == swsm.DISPLAY_MAIN) : swsw.get_dfsubset_main_taskbar() clear_sw_utility_dfsubsetdata() cfg.display_data_select_df(cfg.SWDFSubsetUtility_ID) swsm.clear_current_subset_data() elif(optionId == swsm.DISPLAY_GET_SUBSET) : swsm.clear_current_subset_data() if(DEBUG_SUBSET) : swsm.set_current_subset_sequence(swsm.dfc_subset_sequence()) print("\ncurrent_subset_sequence\n") print("input_df_title",swsm.get_current_subset_sequence().get_input_df_title()) print("get_sequence_title",swsm.get_current_subset_sequence().get_sequence_title()) print("get_sequence_steps",swsm.get_current_subset_sequence().get_sequence_steps()) if(not (swsm.get_current_subset_sequence().get_sequence_steps() is None)) : print("get_total_sequence_steps",swsm.get_current_subset_sequence().get_total_sequence_steps()) print("get_output_csv",swsm.get_current_subset_sequence().get_output_csv()) print("get_output_dfc_df_title",swsm.get_current_subset_sequence().get_output_dfc_df_title()) swsw.display_df_subset_setup() if(DEBUG_SUBSET) : print("DISPLAY_GET_SUBSET",parms) print("DISPLAY_GET_SUBSET : clear data") print(swsm.get_current_subset_sequence()) print(swsm.get_current_subset_df()) print(swsm.get_current_subset_step()) print("new_sequence") swsm.dump_current_step() swsm.dump_current_sequence() elif(optionId == swsm.PROCESS_GET_SUBSET) : current_step = swsm.get_current_subset_step() if(not (current_step is None)) : current_sequence = swsm.get_current_subset_sequence() current_sequence.add_step_to_sequence_steps(current_step) fparms = get_parms_for_input(parms,swsw.get_subset_input_idList) if(len(fparms) > 0) : df_title = fparms[0] df = cfg.get_dfc_dataframe_df(df_title) col_names = fparms[1] col_action = fparms[3] new_subset_df = drop_add_cols(col_names,col_action,df) new_subset_df_title = df_title new_subset_step = swsm.dfc_subset_step(new_subset_df_title,col_names,col_action) swsm.set_current_subset_step(new_subset_step) swsm.set_current_subset_df(new_subset_df) swsw.display_df_criteria(new_subset_df_title,new_subset_df) if(DEBUG_SUBSET) : print("\nPROCESS_GET_SUBSET\n ",parms,"\n ",fparms) swsm.dump_current_step() swsm.dump_current_sequence() elif(optionId == swsm.DISPLAY_SAVED_SUBSET) : swsw.display_saved_subset_sequences() if(DEBUG_SUBSET) : print("\nDISPLAY_SAVED_SUBSET",parms) elif(optionId == swsm.PROCESS_RUN_CRITERIA) : opstat = opStatus() fparms = get_parms_for_input(parms,swsw.get_subset_criteria_input_idList) subset_title = fparms[0] if(len(subset_title) == 0) : current_sequence = swsm.get_current_subset_sequence() total_steps = current_sequence.get_total_sequence_steps() current_step = swsm.get_current_subset_step() subset_title = current_step.get_input_subset_df_title() + "_subset_" + str(total_steps+1) criteria = fparms[2] if(len(criteria) > 0) : try : clock = RunningClock() clock.start() final_criteria = (swsm.starting_criteria_preamble + criteria + swsm.starting_criteria_postamble) exec(final_criteria) current_step = swsm.get_current_subset_step() current_step.set_criteria(criteria) current_step.set_output_subset_df_title(subset_title) clock.stop() except Exception as e: opstat.store_exception("Error running df_criteria " + criteria,e) clock.stop() if(opstat.get_status()) : swsw.display_process_subset() else : display_exception(opstat) if(DEBUG_SUBSET) : print("PROCESS_RUN_CRITERIA : End") swsm.dump_current_step() swsm.dump_current_sequence() elif(optionId == swsm.DISPLAY_SAVE_SUBSET) : fparms = get_parms_for_input(parms,swsw.get_subset_run_input_idList) current_sequence = swsm.get_current_subset_sequence() current_step = swsm.get_current_subset_step() current_sequence.add_step_to_sequence_steps(current_step) if(len(fparms) > 0) : df_title = fparms[0] df = swsm.get_current_subset_df() col_names = fparms[1] col_action = fparms[3] new_subset_df = drop_add_cols(col_names,col_action,df) swsw.display_save_subset(df_title,new_subset_df) if(DEBUG_SUBSET) : print("DISPLAY_SAVE_SUBSET",parms,fparms) swsm.dump_current_step() swsm.dump_current_sequence() elif(optionId == swsm.DISPLAY_SAVE_AND_GET_SUBSET) : fparms = get_parms_for_input(parms,swsw.get_subset_run_input_idList) current_sequence = swsm.get_current_subset_sequence() current_step = swsm.get_current_subset_step() current_sequence.add_step_to_sequence_steps(current_step) if(len(fparms) > 0) : df_title = fparms[0] col_names = fparms[1] col_action = fparms[3] new_subset_step = swsm.dfc_subset_step(df_title,col_names,col_action) df = swsm.get_current_subset_df() new_subset_df = drop_add_cols(col_names,col_action,df) swsm.set_current_subset_df(new_subset_df) swsm.set_current_subset_step(new_subset_step) swsw.display_df_criteria(df_title,new_subset_df) if(DEBUG_SUBSET) : print("PROCESS_SAVE_AND_GET_SUBSET",parms,fparms) swsm.dump_current_step() swsm.dump_current_sequence() elif(optionId == swsm.PROCESS_SAVE_SUBSET) : save_subset_run(parms,0) elif(optionId == swsm.PROCESS_SUBSET_SEQUENCE) : opstat = opStatus() fparms = get_parms_for_input(parms,swsw.get_subset_sequences_input_idList) sequence = fparms[0] run_option = fparms[1] saved_sequence = swsm.get_subset_sequence(sequence) first_step = saved_sequence.get_sequence_step(0) df_title = first_step.get_input_subset_df_title() df = cfg.get_dfc_dataframe_df(df_title) if(df is None) : swsw.get_dfsubset_main_taskbar() cfg.display_data_select_df(cfg.SWDFSubsetUtility_ID) opstat.set_status(False) opstat.set_errorMsg("subset sequence starting df '" + df_title + "' is not currently loaded in dfc") display_exception(opstat) else : if(run_option == "Auto Run") : total_steps = saved_sequence.get_total_sequence_steps() swsm.set_current_subset_df(df) for i in range(total_steps) : current_step = saved_sequence.get_sequence_step(i) current_columns = current_step.get_col_names_list() columns_action = current_step.get_keep_drop_flag() criteria = current_step.get_criteria() output_df_title = current_step.get_output_subset_df_title() current_df = swsm.get_current_subset_df() if(len(current_columns) > 0) : colnames = list(current_df.columns) drop_columns = [] for i in range(len(colnames)) : if(columns_action == "Keep") : if(not (colnames[i] in current_columns)) : drop_columns.append(colnames[i]) else : if(colnames[i] in current_columns) : drop_columns.append(colnames[i]) if(len(drop_columns) > 0 ) : try : current_df.drop(drop_columns, axis=1, inplace=True) except : opstat.set_status(False) opstat.set_errorMsg("Unable to drop columns from subset dataframe") swsm.set_current_subset_df(current_df) try : current_df = swsm.get_current_subset_df() exec(criteria + swsm.starting_criteria_postamble) current_df = swsm.get_current_subset_df() except Exception as e: opstat.store_exception("Error running subset sequence '" + sequence + "'",e) swsw.display_save_subset(output_df_title,swsm.get_current_subset_df(),True) else : total_steps = saved_sequence.get_total_sequence_steps() swsm.set_current_subset_df(df) current_step = saved_sequence.get_sequence_step(0) current_df_title = current_step.get_input_subset_df_title() current_columns = current_step.get_col_names_list() columns_action = current_step.get_keep_drop_flag() swsw.display_manual_df_subset_setup(saved_sequence.get_sequence_title(),current_df_title,current_columns,columns_action,0) swsm.set_current_subset_step_id(0) swsm.set_current_subset_sequence(saved_sequence) elif(optionId == swsm.PROCESS_GET_NEXT_SUBSET) : fparms = get_parms_for_input(parms,swsw.get_manual_input_idList) collist = fparms[1] collist = collist.lstrip("[") collist = collist.rstrip("]") collist = collist.split(",") keep_drop = fparms[3] saved_sequence = swsm.get_current_subset_sequence() total_steps = saved_sequence.get_total_sequence_steps() current_step_id = swsm.get_current_subset_step_id() if(current_step_id < total_steps) : current_step = saved_sequence.get_sequence_step(swsm.get_current_subset_step_id()) else : swsm.set_current_subset_step_col_names_list(collist) swsm.set_current_subset_keep_drop_flag(keep_drop) current_step = swsm.get_current_subset_step() swsm.dump_current_step() swsm.set_current_subset_step(current_step) current_df_title = current_step.get_input_subset_df_title() current_df = swsm.get_current_subset_df() current_columns = current_step.get_col_names_list() columns_action = current_step.get_keep_drop_flag() criteria = current_step.get_criteria() output_df_title = current_step.get_output_subset_df_title() if(len(current_columns) > 0) : colnames = list(current_df.columns) drop_columns = [] for i in range(len(colnames)) : if(columns_action == "Keep") : if(not (colnames[i] in current_columns)) : drop_columns.append(colnames[i]) else : if(colnames[i] in current_columns) : drop_columns.append(colnames[i]) if(len(drop_columns) > 0 ) : try : current_df.drop(drop_columns, axis=1, inplace=True) except : opstat.set_status(False) opstat.set_errorMsg("Unable to drop columns from subset dataframe") swsw.display_next_criteria(current_df_title,current_df,criteria,output_df_title) elif(optionId == swsm.PROCESS_NEXT_CRITERIA) : opstat = opStatus() fparms = get_parms_for_input(parms,swsw.get_next_criteria_input_idList) output_df_title = fparms[0] criteria = fparms[2] current_sequence = swsm.get_subset_sequence(sequence) sequence_title = current_sequence.get_sequence_title() try : current_df = swsm.get_current_subset_df() exec(criteria + swsm.starting_criteria_postamble) current_df = swsm.get_current_subset_df() except Exception as e: opstat.store_exception("Error running subset sequence '" + sequence_title + "'",e) current_df_title = current_step.get_input_subset_df_title() current_df = swsm.get_current_subset_df() criteria = current_step.get_criteria() output_df_title = current_step.get_output_subset_df_title() swsw.display_next_criteria(current_df_title,current_df,criteria,output_df_title) display_exception(opstat) if(opstat.get_status()) : swsm.set_current_subset_df(current_df) swsm.set_current_subset_step_id(swsm.get_current_subset_step_id() + 1) if(swsm.get_current_subset_step_id() >= swsm.get_current_subset_sequence().get_total_sequence_steps()) : swsw.display_sequence_save_subset(output_df_title,swsm.get_current_subset_df()) else : current_step = swsm.get_current_subset_sequence().get_sequence_step(swsm.get_current_subset_step_id()) current_df_title = current_step.get_input_subset_df_title() current_columns = current_step.get_col_names_list() columns_action = current_step.get_keep_drop_flag() swsw.display_manual_df_subset_setup(swsm.get_current_subset_sequence().get_sequence_title(),current_df_title,current_columns,columns_action,swsm.get_current_subset_step_id()) elif(optionId == swsm.DISPLAY_NEW_STEP) : current_sequence = swsm.get_current_subset_sequence() sequence_title = current_sequence.get_sequence_title() current_step = swsm.get_current_subset_step() df_title = current_step.get_output_subset_df_title() current_df = swsm.get_current_subset_df() current_columns = [] current_action = "Keep" criteria = swsm.starting_criteria output_df_title = "" current_step = swsm.dfc_subset_step(df_title,current_columns,current_action,criteria,output_df_title) swsm.set_current_subset_step(current_step) swsw.display_manual_df_subset_setup(sequence_title,df_title,current_columns,current_action,swsm.get_current_subset_step_id()) #swsw.display_next_criteria(df_title,current_df,criteria,output_df_title) elif(optionId == swsm.PROCESS_SAVE_SAVED_SUBSET ) : save_subset_run(parms,1) elif(optionId == swsm.DISPLAY_GET_REMOTE_SUBSET) : chapterid = parms[0] new_config_df = None if(chapterid == cfg.DataInspection_ID) : new_config_df = cfg.get_config_value(cfg.CURRENT_INSPECTION_DF) elif(chapterid == cfg.DataCleansing_ID) : new_config_df = cfg.get_config_value(cfg.CURRENT_CLEANSE_DF) elif(chapterid == cfg.DataTransform_ID) : new_config_df = cfg.get_config_value(cfg.CURRENT_TRANSFORM_DF) elif(chapterid == cfg.DataExport_ID) : new_config_df = cfg.get_config_value(cfg.CURRENT_EXPORT_DF) elif(chapterid == cfg.DataImport_ID) : new_config_df = cfg.get_config_value(cfg.CURRENT_IMPORT_DF) elif(chapterid == cfg.SWGeocodeUtility_ID) : new_config_df = cfg.get_config_value(cfg.CURRENT_GEOCODE_DF) elif(chapterid == cfg.SWDFSubsetUtility_ID) : new_config_df = cfg.get_config_value(cfg.CURRENT_SUBSET_DF) cfg.set_config_value(cfg.CURRENT_SUBSET_DF,new_config_df) swsm.clear_current_subset_data() swsw.display_df_subset_setup() else : swsw.get_dfsubset_main_taskbar() cfg.drop_config_value(cfg.CURRENT_SUBSET_DF) clear_sw_utility_dfsubsetdata() cfg.display_data_select_df(cfg.SWDFSubsetUtility_ID) if(not(optionId == swsm.DISPLAY_MAIN)) : cfg.display_no_dfs(cfg.SWDFSubsetUtility_ID)
def save_subset_run(parms,saveid) : opstat = opStatus() if(saveid == 0) : fparms = get_parms_for_input(parms,swsw.get_subset_save_input_idList) else : fparms = get_parms_for_input(parms,swsw.get_saved_save_input_idList) dfc_df_title = fparms[0] csv_file_name = fparms[1] save_subset_name = fparms[2] if(saveid == 1) : drop_original = fparms[3] if(drop_original == "False") : drop_original = False else : drop_original = True else : drop_original = False if(len(dfc_df_title) > 0) : try : new_dfc_df = cfg.dfc_dataframe(dfc_df_title,swsm.get_current_subset_df(), "df from dataframe subset utility") cfg.add_dfc_dataframe(new_dfc_df) except Exception as e: opstat.store_exception("Error saving subset df as dfc df " + dfc_df_title,e) if(len(csv_file_name) > 0) : try : swsm.get_current_subset_df().to_csv(csv_file_name,index=False) except Exception as e: opstat.store_exception("Error saving subset df as csv file " + csv_file_name,e) if(len(save_subset_name) > 0) : try : swsm.add_subset_sequence(save_subset_name) except Exception as e: opstat.store_exception("Error saving subset sequence " + save_subset_name,e) if(drop_original) : current_sequence = swsm.get_current_subset_sequence() sequence_title = current_sequence.get_sequence_title() swsm.drop_subset_sequence(sequence_title) swsw.get_dfsubset_main_taskbar() cfg.display_data_select_df(cfg.SWDFSubsetUtility_ID) print("\n") if(opstat.get_status()) : status_msg = "" if(len(dfc_df_title) > 0) : status_msg = status_msg + ("Final Subset df saved as dfc df : " + dfc_df_title + "<br><br>") if(len(csv_file_name) > 0) : status_msg = status_msg + ("Final Subset df saved as excel file : " + csv_file_name + "<br><br>") if(len(save_subset_name) > 0) : status_msg = status_msg + ("Subset sequence saved as : " + save_subset_name + "<br>") if(drop_original) : status_msg = status_msg + ("Original Subset sequence dropped : " + sequence_title + "<br>") if(len(status_msg) > 0) : display_status(status_msg) else : display_exception(opstat) swsm.clear_current_subset_data() if(DEBUG_SUBSET) : print("PROCESS_SAVE_SUBSET",parms,fparms) print("dfc_df_title",dfc_df_title) print("csv_file_name",csv_file_name) print("save_subset_name",save_subset_name) swsm.dump_current_step() swsm.dump_current_sequence()
def display_dict_maint(keyValue=None, loadfile=None): """ * ------------------------------------------------------------------------ * function : display the user dicts maintenance form * * parms : * keyValue - dict name * * ------------------------------------------------------------------------- """ #print("display_dict_maint",keyValue,loadfile) opstat = opStatus() if (loadfile is None): dict_maint_input_form = InputForm( maint_dict_utility_input_id, maint_dict_utility_input_idList, maint_dict_utility_input_labelList, maint_dict_utility_input_typeList, maint_dict_utility_input_placeholderList, maint_dict_utility_input_jsList, maint_dict_utility_input_reqList) else: dict_maint_input_form = InputForm( maint_dict_file_utility_input_id, maint_dict_file_utility_input_idList, maint_dict_file_utility_input_labelList, maint_dict_file_utility_input_typeList, maint_dict_file_utility_input_placeholderList, maint_dict_file_utility_input_jsList, maint_dict_file_utility_input_reqList) selectDicts = [] from dfcleanser.sw_utilities.sw_utility_model import get_dicts_names, USER_CREATED dict_names = get_dicts_names(USER_CREATED) #print("dict_names",dict_names) if (not (dict_names is None)): if (keyValue is None): def_dict = dict_names[0] else: def_dict = keyValue seldict = swum.get_Dict(def_dict, USER_CREATED) keys = list(seldict.keys()) if ((def_dict == "Country_Codes") or (def_dict == "Language_Codes")): keys.sort() seldict = swum.get_pretty_dict(seldict, keys) else: dict_names = ["No User dicts defined"] def_dict = "No User dicts defined" seldict = "User defined dict" dictssel = { "default": def_dict, "list": dict_names, "callback": "select_dict" } selectDicts.append(dictssel) from dfcleanser.common.common_utils import get_select_defaults get_select_defaults(dict_maint_input_form, maint_dict_utility_input_id, maint_dict_utility_input_idList, maint_dict_utility_input_typeList, selectDicts) dict_maint_input_form.set_gridwidth(700) #dict_maint_input_form.set_custombwidth(110) if (loadfile is None): dict_maint_input_form.set_buttonstyle({ "font-size": 13, "height": 75, "width": 90, "left-margin": 20 }) else: dict_maint_input_form.set_buttonstyle({ "font-size": 13, "height": 75, "width": 90, "left-margin": 205 }) dict_maint_input_form.set_fullparms(True) cfg.drop_config_value(maint_dict_utility_input_id + "Parms") cfg.drop_config_value(maint_dict_utility_input_id + "ParmsProtect") if (not (loadfile is None)): import json #from dfcleanser.common.common_utils import does_file_exist #print("does_file_exist",does_file_exist(loadfile)) try: with open(loadfile, 'r') as ds_file: ds = json.load(ds_file) ds_file.close() keys = list(ds.keys()) seldict = swum.get_pretty_dict(ds, keys) #print(seldict) except Exception as e: opstat.set_errorMsg("invalid user file to load " + loadfile) opstat.set_exception(e) if (opstat.get_status()): if (loadfile is None): cfg.set_config_value(maint_dict_utility_input_id + "Parms", [def_dict, "", seldict, ""]) else: cfg.set_config_value(maint_dict_utility_input_id + "Parms", [def_dict, "", seldict, loadfile]) cfg.set_config_value(maint_dict_utility_input_id + "ParmsProtect", [True, False, True, True]) help_note = "To add a user dict enter parms and values above and click on 'Add User Dict'.</br>To update the current dict change values and click on 'Update User Dict'" from dfcleanser.common.common_utils import get_help_note_html dict_maint_notes_html = get_help_note_html(help_note, 80, 75, None) dict_maint_html = "Fill in new user dict parms or update currently displayed user dict." dict_maint_html = dict_maint_input_form.get_html() dict_maint_title_html = "<div>User Dicts</div><br></br>" gridclasses = [ "dfcleanser-common-grid-header", "dfc-bottom", "dfc-footer" ] gridhtmls = [ dict_maint_title_html, dict_maint_html, dict_maint_notes_html ] #print(dict_maint_html) #print(dict_maint_notes_html) print("\n") display_generic_grid("sw-utils-listdict-wrapper", gridclasses, gridhtmls) else: display_exception(opstat) add_error_to_log("[Get User Dict from File] " + loadfile + str(sys.exc_info()[0].__name__))
def process_export_form(formid, parms, display=True): """ * -------------------------------------------------------------------------- * function : process export function * * parms : * formid - form id * fname - export parms * display - display flag * * returns : N/A * -------------------------------------------------------- """ from dfcleanser.common.html_widgets import define_inputs, are_owner_inputs_defined if (not (are_owner_inputs_defined(cfg.DataExport_ID))): define_inputs(cfg.DataExport_ID, dew.dataexport_inputs) if (not (cfg.is_a_dfc_dataframe_loaded())): print("No Dataframe Currently Loadad") return () if (formid == dem.EXPORT_DF_FROM_CENSUS): opstat = opStatus() dfid = parms + "_df" cfg.set_config_value(cfg.CURRENT_EXPORT_DF, dfid) display_export_forms(0) elif (formid == dem.EXPORT_TO_DB_FROM_CENSUS): opstat = opStatus() dfid = parms + "_df" cfg.set_config_value(cfg.CURRENT_EXPORT_DF, dfid) dew.display_dc_export_forms(2, 4) elif ((formid == dem.CSV_EXPORT) or (formid == dem.EXCEL_EXPORT) or (formid == dem.JSON_EXPORT) or (formid == dem.HTML_EXPORT) or (formid == dem.CUSTOM_EXPORT)): opstat = opStatus() if (display): clear_output() dew.display_export_main_taskbar() save_data_export_start() clock = RunningClock() clock.start() if (formid == dem.CSV_EXPORT): fparms = dew.get_csv_export_inputs(parms) opstat = export_pandas_csv(fparms, dew.pandas_export_csv_id, dew.pandas_export_csv_labelList) parmstitle = "Pandas CSV Export Parms" parmslist = dew.pandas_export_csv_labelList[:6] elif (formid == dem.EXCEL_EXPORT): fparms = dew.get_excel_export_inputs(parms) opstat = export_pandas_excel(fparms, dew.pandas_export_excel_id, dew.pandas_export_excel_labelList) parmstitle = "Pandas Excel Export Parms" parmslist = dew.pandas_export_excel_labelList[:7] elif (formid == dem.JSON_EXPORT): fparms = dew.get_json_export_inputs(parms) opstat = export_pandas_json(fparms, dew.pandas_export_json_id, dew.pandas_export_json_labelList) parmstitle = "Pandas JSON Export Parms" parmslist = dew.pandas_export_json_labelList[:6] elif (formid == dem.HTML_EXPORT): fparms = dew.get_html_export_inputs(parms) opstat = export_pandas_html(fparms, dew.pandas_export_html_id, dew.pandas_export_html_labelList) parmstitle = "Pandas HTML Export Parms" parmslist = dew.pandas_export_html_labelList[:8] elif (formid == dem.CUSTOM_EXPORT): (dispstats, opstat) = export_custom(parms) if (dispstats): parmstitle = "Custom Export Parms" parmslist = dew.custom_export_labelList[:4] if (opstat.get_status()): if (display): if (formid == dem.CUSTOM_EXPORT): if (dispstats): ciparms = parms[0].replace("\n", "</br>") display_data_export_parms(parmstitle, parmslist, [ciparms], cfg.DataExport_ID, fparms[1], True) else: display_data_export_parms(parmstitle, parmslist, fparms, cfg.DataExport_ID, fparms[1]) else: display_exception(opstat) if (display): clock.stop() elif (formid == dem.SQLTABLE_EXPORT): export_sql_table(parms) else: print("Invalid formid " + str(formid)) return
def process_df_transform(optionid,parms,display=True) : """ * -------------------------------------------------------------------------- * function : process dataframe transform option * * parms : * optionid - transform option * parms - transform parms * display - display flag * * returns : * N/A * -------------------------------------------------------- """ opstat = opStatus() #dftw.display_dataframe_transform_taskbar() if(optionid == dtm.PROCESS_SHOW_COLUMN_NAMES_ROW) : dftw.display_dataframe_col_names_taskbar() print("\n") col_names_table = dcTable("Column Names ","cnamesTable",cfg.DataTransform_ID) col_names_table.set_table_column_parms({"font":12}) col_names_table.set_note("None") display_column_names(cfg.get_current_chapter_df(cfg.DataTransform_ID),col_names_table,None) if(optionid == dtm.PROCESS_SAVE_COLUMN_NAMES_ROW) : [opstat, filename] = save_column_names_row(parms) dftw.display_dataframe_col_names_taskbar() if(opstat.get_status()) : display_status_note("Column Names Row Saved Successfully to : " + filename) clear_dataframe_transform_cfg_values() else : display_exception(opstat) # add column names row elif(optionid == dtm.PROCESS_ADD_COLUMN_NAMES_ROW) : opstat = add_column_names_row(parms) dftw.display_dataframe_col_names_taskbar() print("\n") if(opstat.get_status()) : clear_dataframe_transform_cfg_values() display_status_note("Column Names Row Added Successfully") col_names_table = dcTable("Column Names ","cnamesTable",cfg.DataTransform_ID) col_names_table.set_table_column_parms({"font":12}) col_names_table.set_note("None") display_column_names(cfg.get_current_chapter_df(cfg.CURRENT_TRANSFORM_DF),col_names_table,None) else : display_main_option([[0,0]]) display_exception(opstat) elif(optionid == dtm.PROCESS_CHANGE_COLUMN_NAMES) : opstat = change_column_names(parms) dftw.display_dataframe_col_names_taskbar() print("\n") if(opstat.get_status()) : clear_dataframe_transform_cfg_values() display_status_note("Column Names Changed Successfully") col_names_table = dcTable("Column Names ","cnamesTable",cfg.DataTransform_ID) col_names_table.set_table_column_parms({"font":12}) col_names_table.set_note("None") display_column_names(cfg.get_current_chapter_df(cfg.CURRENT_TRANSFORM_DF),col_names_table,None) else : display_exception(opstat) if(optionid == dtm.PROCESS_DROP_COLUMN_NAMES_ROW) : opstat = drop_column_names_row() dftw.display_dataframe_col_names_taskbar() print("\n") if(opstat.get_status()) : display_status_note("Column Names Row Dropped Successfully") clear_dataframe_transform_cfg_values() else : display_exception(opstat) if(optionid == dtm.PROCESS_WHITESPACE_COLUMN_NAMES) : opstat = remwhitespace_column_names_row(parms) dftw.display_dataframe_col_names_taskbar() print("\n") if(opstat.get_status()) : display_status_note("Column Names Whitespace Removed Successfully") clear_dataframe_transform_cfg_values() else : display_exception(opstat) elif(optionid == dtm.PROCESS_SET_DF_INDEX) : opstat = set_df_index(parms) dftw.display_dataframe_indices_taskbar() print("\n") if(opstat.get_status()) : clear_dataframe_transform_cfg_values() display_status_note("df Index Set Successfully") else : display_exception(opstat) dftw.display_current_df_index(cfg.get_current_chapter_df(cfg.DataTransform_ID), cfg.get_current_chapter_dfc_df_title(cfg.DataTransform_ID)) dftw.display_remote_df(cfg.DataTransform_ID) elif(optionid == dtm.PROCESS_RESET_DF_INDEX) : opstat = reset_df_index(parms) dftw.display_dataframe_indices_taskbar() print("\n") if(opstat.get_status()) : clear_dataframe_transform_cfg_values() display_status_note("df Index Reset Successfully") else : display_exception(opstat) dftw.display_current_df_index(cfg.get_current_chapter_df(cfg.DataTransform_ID), cfg.get_current_chapter_dfc_df_title(cfg.DataTransform_ID)) dftw.display_remote_df(cfg.DataTransform_ID) elif(optionid == dtm.PROCESS_APPEND_TO_INDEX) : opstat = append_to_df_index(parms) dftw.display_dataframe_indices_taskbar() print("\n") if(opstat.get_status()) : clear_dataframe_transform_cfg_values() display_status_note("df Index Appended to Successfully") else : dftw.display_dataframe_options([[4,0]]) display_exception(opstat) dftw.display_current_df_index(cfg.get_current_chapter_df(cfg.DataTransform_ID), cfg.get_current_chapter_dfc_df_title(cfg.DataTransform_ID)) dftw.display_remote_df(cfg.DataTransform_ID) elif(optionid == dtm.PROCESS_SORT_DF_INDEX) : opstat = sort_df_index(parms) dftw.display_dataframe_indices_taskbar() print("\n") if(opstat.get_status()) : clear_dataframe_transform_cfg_values() display_status_note("Dataframe Sorted by index Successfully") else : display_exception(opstat) dftw.display_current_df_index(cfg.get_current_chapter_df(cfg.DataTransform_ID), cfg.get_current_chapter_dfc_df_title(cfg.DataTransform_ID)) dftw.display_remote_df(cfg.DataTransform_ID) # drop duplicate rows elif(optionid == dtm.PROCESS_SORT_COLUMN) : opstat = process_sort_by_column(parms,display) dftw.display_dataframe_transform_main() print("\n") if(opstat.get_status()) : clear_dataframe_transform_cfg_values() display_status_note(opstat.get_errorMsg()) else : display_main_option([[0,0]]) display_exception(opstat) # drop duplicate rows elif(optionid == dtm.PROCESS_DROP_DUPLICATE_ROWS) : df = cfg.get_current_chapter_df(cfg.DataTransform_ID) start_rows = len(df) opstat = drop_duplicate_rows(parms,display) end_rows = len(df) dftw.display_dataframe_transform_main() print("\n") if(opstat.get_status()) : clear_dataframe_transform_cfg_values() display_status_note(str(start_rows-end_rows) + " Duplicate Rows Dropped Successfully") else : display_exception(opstat) # return elif(optionid == dtm.DF_TRANSFORM_RETURN) : dftw.display_dataframe_transform_main() # help elif(optionid == dtm.DF_TRANSFORM_HELP) : print("help")
def display_dc_data_scripting(optionId, parms=None): from IPython.display import clear_output clear_output() from dfcleanser.common.cfg import check_if_dc_init if (not check_if_dc_init()): from dfcleanser.common.display_utils import display_dfcleanser_taskbar display_dfcleanser_taskbar( ButtonGroupForm(dc_script_tb_id, dc_script_tb_keyTitleList, dc_script_tb_jsList, dc_script_tb_centered), False) from dfcleanser.scripting.data_scripting_control import clear_data_scripting_data clear_data_scripting_data() return from dfcleanser.common.display_utils import display_dfcleanser_taskbar display_dfcleanser_taskbar( ButtonGroupForm(dc_script_tb_id, dc_script_tb_keyTitleList, dc_script_tb_jsList, dc_script_tb_centered), False) if (parms == None): from dfcleanser.scripting.data_scripting_control import clear_data_scripting_data clear_data_scripting_data() else: funcid = int(parms[0]) if (funcid == dsm.TURN_ON_SCRIPTING): from dfcleanser.scripting.data_scripting_control import set_scripting_status set_scripting_status(True) from dfcleanser.common.common_utils import display_status_note display_status_note("Scripting is turned on") elif (funcid == dsm.TURN_OFF_SCRIPTING): from dfcleanser.scripting.data_scripting_control import set_scripting_status set_scripting_status(False) from dfcleanser.common.common_utils import display_status_note display_status_note("Scripting is turned off") elif (funcid == dsm.SHOW_CURRENT_SCRIPT): display_scripting_forms() elif (funcid == dsm.ADD_TO_CURRENT_SCRIPT): script_form = InputForm( dc_add_code_input_id, dc_add_code_input_idList, dc_add_code_input_labelList, dc_add_code_input_typeList, dc_add_code_input_placeholderList, dc_add_code_input_jsList, dc_add_code_input_reqList) script_form.set_shortForm(True) script_form.set_buttonstyle({ "font-size": 12, "height": 75, "width": 140, "left-margin": 70 }) script_form.set_gridwidth(880) script_form.set_fullparms(True) script_form_html = script_form.get_html() script_title_html = "<div>Scripting</div><br>" gridclasses = ["dfcleanser-common-grid-header", "dfc-footer"] gridhtmls = [script_title_html, script_form_html] from dfcleanser.common.common_utils import display_generic_grid display_generic_grid("data-scripting-wrapper", gridclasses, gridhtmls) elif (funcid == dsm.ADD_CODE_SCRIPT): from dfcleanser.scripting.data_scripting_control import add_code_to_script add_code_to_script(parms) display_status("Code added to Current Script succesfully ") elif (funcid == dsm.DELETE_CURRENT_SCRIPT): from dfcleanser.scripting.data_scripting_control import drop_current_script drop_current_script() display_status("Current Script Cleared succesfully ") elif (funcid == dsm.LOAD_BACKUP_SCRIPT): from dfcleanser.scripting.data_scripting_control import load_backup_scriptlog_to_current load_backup_scriptlog_to_current() display_scripting_forms() display_status("Current Script Loaded from Backup ") elif (funcid == dsm.SAVE_BACKUP_SCRIPT): codeparms = get_parms_for_input(parms[1], dc_script_input_idList) save_code = get_code_from_form(codeparms) from dfcleanser.scripting.data_scripting_control import ( set_current_scriptlog, save_current_scriptlog_to_backup) set_current_scriptlog(save_code) save_current_scriptlog_to_backup(save_code) display_scripting_forms() display_status("Current Script Backed up successfully ") elif (funcid == dsm.RUN_CURRENT_SCRIPT): opstat = opStatus() from dfcleanser.scripting.data_scripting_control import run_scriptlog run_scriptlog(parms, opstat) if (opstat.get_status()): display_status("Current Script Run successfully ") else: display_exception(opstat)
def process_sw_utilities(optionId, parms=None): from IPython.display import clear_output clear_output() from dfcleanser.common.html_widgets import define_inputs, are_owner_inputs_defined if (not (are_owner_inputs_defined(cfg.SWUtilities_ID))): define_inputs(cfg.SWUtilities_ID, swuw.SWUtility_inputs) if (not cfg.check_if_dc_init()): swuw.get_sw_utilities_main_taskbar() clear_sw_utility_data() return opstat = opStatus() if (optionId == swum.MAIN_OPTION): swuw.get_sw_utilities_main_taskbar() clear_sw_utility_data() return if (optionId == swum.LIST_OPTION): swuw.get_sw_utilities_main_taskbar() swuw.display_list_dict(swum.LIST_ID) return elif (optionId == swum.DICT_OPTION): swuw.get_sw_utilities_main_taskbar() swuw.display_list_dict(swum.DICT_ID) return elif (optionId == swum.FUNCS_OPTION): swuw.get_sw_utilities_main_taskbar() swuw.display_funcs() return elif (optionId == swum.MAINT_LIST_OPTION): swuw.get_sw_utilities_main_taskbar() swuw.display_list_maint() return elif (optionId == swum.MAINT_DICT_OPTION): swuw.get_sw_utilities_main_taskbar() swuw.display_dict_maint() return elif (optionId == swum.SELECT_LIST_OPTION): swuw.get_sw_utilities_main_taskbar() swuw.display_list_dict(swum.LIST_ID, parms[0]) return elif (optionId == swum.SELECT_DICT_OPTION): swuw.get_sw_utilities_main_taskbar() swuw.display_list_dict(swum.DICT_ID, parms[0]) return elif (optionId == swum.UPDATE_LIST_OPTION): swuw.get_sw_utilities_main_taskbar() #print("swum.UPDATE_LIST_OPTION",parms) fparms = swuw.get_sw_utilities_list_inputs(parms) #print("fparms",fparms) #filename = fparms[2] listname = fparms[0] newlistname = None listvalues = fparms[2] newlistfname = None try: newlist = json.loads(listvalues) swum.update_List(listname, newlist, swum.USER_CREATED) except Exception as e: opstat.store_exception("user list is invalid ", e) if (opstat.get_status()): swuw.display_list_dict(swum.LIST_ID) else: display_exception(opstat) return elif (optionId == swum.CLEAR_LIST_OPTION): swuw.get_sw_utilities_main_taskbar() #print("swum.CLEAR_LIST_OPTION",parms) swuw.display_list_maint() return if (optionId == swum.ADD_LIST_OPTION): swuw.get_sw_utilities_main_taskbar() #print("swum.ADD_LIST_OPTION",parms) fparms = swuw.get_sw_utilities_list_inputs(parms) #print("fparms",fparms) #filename = fparms[2] newlistname = fparms[1] newlistvalues = fparms[2] newlistfname = fparms[3] if (len(newlistname) > 0): if (len(newlistvalues) > 0): if (len(newlistfname) > 0): swum.add_List(newlistname, None, swum.USER_CREATED, newlistfname) else: try: newlist = json.loads(newlistvalues) swum.add_Dict(newlistname, newlist, swum.USER_CREATED, None) except Exception as e: opstat.store_exception("user list is invalid ", e) else: opstat.set_status(False) opstat.set_errorMsg("No list values defined") else: opstat.set_status(False) opstat.set_errorMsg("No list Name Specified") if (opstat.get_status()): swuw.display_list_dict(swum.LIST_ID) else: display_exception(opstat) return elif (optionId == swum.UPDATE_DICT_OPTION): swuw.get_sw_utilities_main_taskbar() #print("swum.UPDATE_DICT_OPTION",parms) fparms = swuw.get_sw_utilities_dict_inputs(parms) #print("fparms",fparms) dictname = fparms[0] newdictname = None dictvalues = fparms[2] newdictfname = None try: newdict = json.loads(dictvalues) swum.update_Dict(dictname, newdict, swum.USER_CREATED) except Exception as e: opstat.store_exception("user dict is invalid ", e) if (opstat.get_status()): swuw.display_dict_maint(dictname, None) else: display_exception(opstat) return elif (optionId == swum.CLEAR_DICT_OPTION): swuw.get_sw_utilities_main_taskbar() #print("swum.CLEAR_DICT_OPTION",parms) swuw.display_dict_maint() return if (optionId == swum.ADD_DICT_OPTION): swuw.get_sw_utilities_main_taskbar() #print("swum.ADD_DICT_OPTION",parms) fparms = swuw.get_sw_utilities_dict_inputs(parms) #print("fparms",fparms) newdictname = fparms[1] newdictvalues = fparms[2] newdictfname = fparms[3] if (len(newdictname) > 0): if (len(newdictvalues) > 0): if (len(newdictfname) > 0): swum.add_Dict(newdictname, None, swum.USER_CREATED, newdictfname) else: try: newdict = json.loads(newdictvalues) swum.add_Dict(newdictname, newdict, swum.USER_CREATED, None) except Exception as e: opstat.store_exception("user dict is invalid ", e) else: opstat.set_status(False) opstat.set_errorMsg("No dict values defined") else: opstat.set_status(False) opstat.set_errorMsg("No Dict Name Specified") if (opstat.get_status()): swuw.display_list_dict(swum.DICT_ID) else: display_exception(opstat) return if (optionId == swum.LOAD_LIST_OPTION): swuw.get_sw_utilities_main_taskbar() #print("swum.LOAD_LIST_OPTION\n",parms) fparms = get_parms_for_input(parms, swuw.maint_list_utility_input_idList) #print("fparms",fparms) swuw.display_list_maint(None, fparms[3]) return if (optionId == swum.DELETE_LIST_OPTION): swuw.get_sw_utilities_main_taskbar() #print("swum.DELETE_LIST_OPTION",parms) fparms = get_parms_for_input(parms, swuw.maint_list_utility_input_idList) #print("fparms",fparms) listname = fparms[0] opstat = opStatus() for i in range(len(swum.ReservedDicts)): if (listname == swum.ReservedDicts[i]): opstat.set_status(False) opstat.set_errorMsg("List to delete is a system reserved dict") if (opstat.get_status()): if (len(listname) == 0): opstat.set_status(False) opstat.set_errorMsg("Invalid list name to delete") else: swum.delete_List(listname) swuw.display_list_maint() if (not (opstat.get_status())): display_exception(opstat) return if (optionId == swum.LOAD_DICT_OPTION): swuw.get_sw_utilities_main_taskbar() #print("swum.LOAD_DICT_OPTION\n",parms) fparms = get_parms_for_input(parms, swuw.maint_dict_utility_input_idList) #@print("fparms",fparms) swuw.display_dict_maint(None, fparms[3]) return if (optionId == swum.DELETE_DICT_OPTION): swuw.get_sw_utilities_main_taskbar() #print("swum.DELETE_DICT_OPTION",parms) fparms = get_parms_for_input(parms, swuw.maint_dict_utility_input_idList) #print("fparms",fparms) dictname = fparms[0] opstat = opStatus() for i in range(len(swum.ReservedDicts)): if (dictname == swum.ReservedDicts[i]): opstat.set_status(False) opstat.set_errorMsg("Dict to delete is a system reserved dict") if (opstat.get_status()): if (len(dictname) == 0): opstat.set_status(False) opstat.set_errorMsg("Invalid dict name to delete") else: swum.delete_Dict(dictname) swuw.display_dict_maint() if (not (opstat.get_status())): display_exception(opstat) return
def display_list_maint(keyValue=None, loadfile=None): """ * ------------------------------------------------------------------------ * function : display the user lists maintenance form * * parms : * keyValue - list name * * ------------------------------------------------------------------------- """ opstat = opStatus() if (loadfile is None): list_maint_input_form = InputForm( maint_list_utility_input_id, maint_list_utility_input_idList, maint_list_utility_input_labelList, maint_list_utility_input_typeList, maint_list_utility_input_placeholderList, maint_list_utility_input_jsList, maint_list_utility_input_reqList) else: list_maint_input_form = InputForm( maint_list_file_utility_input_id, maint_list_file_utility_input_idList, maint_list_file_utility_input_labelList, maint_list_file_utility_input_typeList, maint_list_file_utility_input_placeholderList, maint_list_file_utility_input_jsList, maint_list_file_utility_input_reqList) selectDicts = [] from dfcleanser.sw_utilities.sw_utility_model import get_lists_names, USER_CREATED list_names = get_lists_names(USER_CREATED) #print("list_names",list_names) if (not (list_names is None)): if (keyValue is None): def_list = list_names[0] else: def_list = keyValue sellist = swum.get_List(def_list, USER_CREATED) dsstr = "[" for i in range(len(sellist)): dsstr = dsstr + str(sellist[i]) if (i == (len(sellist) - 1)): dsstr = dsstr + "]" else: dsstr = dsstr + "," else: list_names = ["No User lists defined"] def_list = "No User lists defined" sellist = "User defined list" listssel = { "default": def_list, "list": list_names, "callback": "select_list" } selectDicts.append(listssel) from dfcleanser.common.common_utils import get_select_defaults get_select_defaults(list_maint_input_form, maint_list_utility_input_id, maint_list_utility_input_idList, maint_list_utility_input_typeList, selectDicts) list_maint_input_form.set_gridwidth(700) if (loadfile is None): list_maint_input_form.set_buttonstyle({ "font-size": 13, "height": 75, "width": 90, "left-margin": 20 }) else: list_maint_input_form.set_buttonstyle({ "font-size": 13, "height": 75, "width": 90, "left-margin": 205 }) list_maint_input_form.set_fullparms(True) cfg.drop_config_value(maint_list_utility_input_id + "Parms") cfg.drop_config_value(maint_list_utility_input_id + "ParmsProtect") if (not (loadfile is None)): import json from dfcleanser.common.common_utils import does_file_exist if (does_file_exist(loadfile)): try: with open(loadfile, 'r') as ds_file: ds = json.load(ds_file) ds_file.close() dsstr = "[" for i in range(len(ds)): dsstr = dsstr + str(ds[i]) if (i == (len(ds) - 1)): dsstr = dsstr + "]" else: dsstr = dsstr + "," except Exception as e: opstat.set_status(False) opstat.set_errorMsg("Error processing user file to load" + loadfile) opstat.set_exception(e) else: opstat.set_status(False) opstat.set_errorMsg("invalid user file to load" + loadfile) if (opstat.get_status()): if (loadfile is None): cfg.set_config_value(maint_list_utility_input_id + "Parms", [def_list, "", dsstr, ""]) else: cfg.set_config_value(maint_list_utility_input_id + "Parms", [def_list, "", dsstr, loadfile]) cfg.set_config_value(maint_list_utility_input_id + "ParmsProtect", [True, False, True, True]) help_note = "To add a user list enter parms and values above and click on 'Add User List'.</br>To update the current list change values and click on 'Update User List'" from dfcleanser.common.common_utils import get_help_note_html list_maint_notes_html = get_help_note_html(help_note, 80, 75, None) list_maint_html = "" list_maint_html = list_maint_input_form.get_html() list_maint_title_html = "<div>User Lists</div><br></br>" gridclasses = [ "dfcleanser-common-grid-header", "dfc-bottom", "dfc-footer" ] gridhtmls = [ list_maint_title_html, list_maint_html, list_maint_notes_html ] #print(list_maint_html) #print(list_maint_notes_html) print("\n") display_generic_grid("sw-utils-listdict-wrapper", gridclasses, gridhtmls) else: display_exception(opstat) add_error_to_log("[Get User Dict from File] " + loadfile + str(sys.exc_info()[0].__name__))
def display_dc_pandas_export_sql_inputs(fId, dbId, dbconparms, exportparms=None): """ * -------------------------------------------------------------------------- * function : display pandas sql export form * * parms : * fid - export type * dbid - database id * dbconparms - db connector parms * exportparms - export parms * * returns : N/A * -------------------------------------------------------- """ opstat = opStatus() opstatStatus = True listHtml = "" dbid = int(dbId) fid = int(fId) fparms = None if (fid == 0): dbid = cfg.get_config_value(cfg.CURRENT_DB_ID_KEY) from dfcleanser.data_import.data_import_widgets import ( get_table_names, TABLE_NAMES, get_rows_html) tablelist = get_table_names(dbid, opstat) listHtml = get_rows_html(tablelist, TABLE_NAMES, True) elif (fid == 1): fparms = get_parms_for_input(exportparms, pandas_export_sqltable_idList) dbid = cfg.get_config_value(cfg.CURRENT_DB_ID_KEY) cfg.set_config_value(pandas_export_sqltable_id + "Parms", fparms) if (len(fparms[0]) > 0): from dfcleanser.data_import.data_import_widgets import ( get_column_names, get_rows_html) from dfcleanser.data_import.data_import_model import COLUMN_NAMES columnlist = get_column_names(dbid, fparms[1], opstat) listHtml = get_rows_html(columnlist, COLUMN_NAMES, True) else: opstat.set_status(False) opstat.set_errorMsg("No Table Selected") elif (fid == 2): cfg.set_config_value(cfg.CURRENT_DB_ID_KEY, dbid) dbcondict = {} if (not (dbconparms == None)): parse_connector_parms(dbconparms, dbid, dbcondict) else: conparms = get_stored_con_Parms(dbid) parse_connector_parms(conparms, dbid, dbcondict) listHtml = get_db_connector_list(dbid, dbcondict) if (not (opstat.get_status())): dbcondict = {} conparms = get_stored_con_Parms(dbid) parse_connector_parms(conparms, dbid, dbcondict) listHtml = get_db_connector_list(dbid, dbcondict) opstatStatus = opstat.get_status() opstatErrormsg = opstat.get_errorMsg() opstat.set_status(True) if (opstat.get_status()): export_sql_input_form = InputForm( pandas_export_sqltable_id, pandas_export_sqltable_idList, pandas_export_sqltable_labelList, pandas_export_sqltable_typeList, pandas_export_sqltable_placeholderList, pandas_export_sqltable_jsList, pandas_export_sqltable_reqList) selectDicts = [] df_list = cfg.get_dfc_dataframes_select_list(cfg.DataExport_ID) selectDicts.append(df_list) exists = {"default": "fail", "list": ["fail", "replace", "append"]} selectDicts.append(exists) index = {"default": "True", "list": ["True", "False"]} selectDicts.append(index) get_select_defaults(export_sql_input_form, pandas_export_sqltable_id, pandas_export_sqltable_idList, pandas_export_sqltable_typeList, selectDicts) export_sql_input_form.set_shortForm(False) export_sql_input_form.set_gridwidth(680) export_sql_input_form.set_custombwidth(125) export_sql_input_form.set_fullparms(True) export_sql_input_html = "" export_sql_input_html = export_sql_input_form.get_html() export_sql_heading_html = "<div>" + get_pandas_export_input_title( dem.SQLTABLE_EXPORT, dbid) + "</div><br>" if (not (exportparms == None)): cfg.set_config_value(pandas_export_sqltable_id + "Parms", fparms) gridclasses = [ "dfcleanser-common-grid-header", "dfc-left", "dfc-right" ] gridhtmls = [export_sql_heading_html, listHtml, export_sql_input_html] display_generic_grid("data-import-sql-table-wrapper", gridclasses, gridhtmls) if (not (opstatStatus)): opstat.set_status(opstatStatus) opstat.set_errorMsg(opstatErrormsg) display_exception(opstat)