def get_dfsubset_input_parms(parms): return (get_parms_for_input(parms, get_subset_input_idList))
def display_dc_data_scripting(optionId, parms=None): from IPython.display import clear_output clear_output() from dfcleanser.common.cfg import check_if_dc_init if (not check_if_dc_init()): from dfcleanser.common.display_utils import display_dfcleanser_taskbar display_dfcleanser_taskbar( ButtonGroupForm(dc_script_tb_id, dc_script_tb_keyTitleList, dc_script_tb_jsList, dc_script_tb_centered), False) from dfcleanser.scripting.data_scripting_control import clear_data_scripting_data clear_data_scripting_data() return from dfcleanser.common.display_utils import display_dfcleanser_taskbar display_dfcleanser_taskbar( ButtonGroupForm(dc_script_tb_id, dc_script_tb_keyTitleList, dc_script_tb_jsList, dc_script_tb_centered), False) if (parms == None): from dfcleanser.scripting.data_scripting_control import clear_data_scripting_data clear_data_scripting_data() else: funcid = int(parms[0]) if (funcid == dsm.TURN_ON_SCRIPTING): from dfcleanser.scripting.data_scripting_control import set_scripting_status set_scripting_status(True) from dfcleanser.common.common_utils import display_status_note display_status_note("Scripting is turned on") elif (funcid == dsm.TURN_OFF_SCRIPTING): from dfcleanser.scripting.data_scripting_control import set_scripting_status set_scripting_status(False) from dfcleanser.common.common_utils import display_status_note display_status_note("Scripting is turned off") elif (funcid == dsm.SHOW_CURRENT_SCRIPT): display_scripting_forms() elif (funcid == dsm.ADD_TO_CURRENT_SCRIPT): script_form = InputForm( dc_add_code_input_id, dc_add_code_input_idList, dc_add_code_input_labelList, dc_add_code_input_typeList, dc_add_code_input_placeholderList, dc_add_code_input_jsList, dc_add_code_input_reqList) script_form.set_shortForm(True) script_form.set_buttonstyle({ "font-size": 12, "height": 75, "width": 140, "left-margin": 70 }) script_form.set_gridwidth(880) script_form.set_fullparms(True) script_form_html = script_form.get_html() script_title_html = "<div>Scripting</div><br>" gridclasses = ["dfcleanser-common-grid-header", "dfc-footer"] gridhtmls = [script_title_html, script_form_html] from dfcleanser.common.common_utils import display_generic_grid display_generic_grid("data-scripting-wrapper", gridclasses, gridhtmls) elif (funcid == dsm.ADD_CODE_SCRIPT): from dfcleanser.scripting.data_scripting_control import add_code_to_script add_code_to_script(parms) display_status("Code added to Current Script succesfully ") elif (funcid == dsm.DELETE_CURRENT_SCRIPT): from dfcleanser.scripting.data_scripting_control import drop_current_script drop_current_script() display_status("Current Script Cleared succesfully ") elif (funcid == dsm.LOAD_BACKUP_SCRIPT): from dfcleanser.scripting.data_scripting_control import load_backup_scriptlog_to_current load_backup_scriptlog_to_current() display_scripting_forms() display_status("Current Script Loaded from Backup ") elif (funcid == dsm.SAVE_BACKUP_SCRIPT): codeparms = get_parms_for_input(parms[1], dc_script_input_idList) save_code = get_code_from_form(codeparms) from dfcleanser.scripting.data_scripting_control import ( set_current_scriptlog, save_current_scriptlog_to_backup) set_current_scriptlog(save_code) save_current_scriptlog_to_backup(save_code) display_scripting_forms() display_status("Current Script Backed up successfully ") elif (funcid == dsm.RUN_CURRENT_SCRIPT): opstat = opStatus() from dfcleanser.scripting.data_scripting_control import run_scriptlog run_scriptlog(parms, opstat) if (opstat.get_status()): display_status("Current Script Run successfully ") else: display_exception(opstat)
def process_sw_utilities(optionId, parms=None): from IPython.display import clear_output clear_output() from dfcleanser.common.html_widgets import define_inputs, are_owner_inputs_defined if (not (are_owner_inputs_defined(cfg.SWUtilities_ID))): define_inputs(cfg.SWUtilities_ID, swuw.SWUtility_inputs) if (not cfg.check_if_dc_init()): swuw.get_sw_utilities_main_taskbar() clear_sw_utility_data() return opstat = opStatus() if (optionId == swum.MAIN_OPTION): swuw.get_sw_utilities_main_taskbar() clear_sw_utility_data() return if (optionId == swum.LIST_OPTION): swuw.get_sw_utilities_main_taskbar() swuw.display_list_dict(swum.LIST_ID) return elif (optionId == swum.DICT_OPTION): swuw.get_sw_utilities_main_taskbar() swuw.display_list_dict(swum.DICT_ID) return elif (optionId == swum.FUNCS_OPTION): swuw.get_sw_utilities_main_taskbar() swuw.display_funcs() return elif (optionId == swum.MAINT_LIST_OPTION): swuw.get_sw_utilities_main_taskbar() swuw.display_list_maint() return elif (optionId == swum.MAINT_DICT_OPTION): swuw.get_sw_utilities_main_taskbar() swuw.display_dict_maint() return elif (optionId == swum.SELECT_LIST_OPTION): swuw.get_sw_utilities_main_taskbar() swuw.display_list_dict(swum.LIST_ID, parms[0]) return elif (optionId == swum.SELECT_DICT_OPTION): swuw.get_sw_utilities_main_taskbar() swuw.display_list_dict(swum.DICT_ID, parms[0]) return elif (optionId == swum.UPDATE_LIST_OPTION): swuw.get_sw_utilities_main_taskbar() #print("swum.UPDATE_LIST_OPTION",parms) fparms = swuw.get_sw_utilities_list_inputs(parms) #print("fparms",fparms) #filename = fparms[2] listname = fparms[0] newlistname = None listvalues = fparms[2] newlistfname = None try: newlist = json.loads(listvalues) swum.update_List(listname, newlist, swum.USER_CREATED) except Exception as e: opstat.store_exception("user list is invalid ", e) if (opstat.get_status()): swuw.display_list_dict(swum.LIST_ID) else: display_exception(opstat) return elif (optionId == swum.CLEAR_LIST_OPTION): swuw.get_sw_utilities_main_taskbar() #print("swum.CLEAR_LIST_OPTION",parms) swuw.display_list_maint() return if (optionId == swum.ADD_LIST_OPTION): swuw.get_sw_utilities_main_taskbar() #print("swum.ADD_LIST_OPTION",parms) fparms = swuw.get_sw_utilities_list_inputs(parms) #print("fparms",fparms) #filename = fparms[2] newlistname = fparms[1] newlistvalues = fparms[2] newlistfname = fparms[3] if (len(newlistname) > 0): if (len(newlistvalues) > 0): if (len(newlistfname) > 0): swum.add_List(newlistname, None, swum.USER_CREATED, newlistfname) else: try: newlist = json.loads(newlistvalues) swum.add_Dict(newlistname, newlist, swum.USER_CREATED, None) except Exception as e: opstat.store_exception("user list is invalid ", e) else: opstat.set_status(False) opstat.set_errorMsg("No list values defined") else: opstat.set_status(False) opstat.set_errorMsg("No list Name Specified") if (opstat.get_status()): swuw.display_list_dict(swum.LIST_ID) else: display_exception(opstat) return elif (optionId == swum.UPDATE_DICT_OPTION): swuw.get_sw_utilities_main_taskbar() #print("swum.UPDATE_DICT_OPTION",parms) fparms = swuw.get_sw_utilities_dict_inputs(parms) #print("fparms",fparms) dictname = fparms[0] newdictname = None dictvalues = fparms[2] newdictfname = None try: newdict = json.loads(dictvalues) swum.update_Dict(dictname, newdict, swum.USER_CREATED) except Exception as e: opstat.store_exception("user dict is invalid ", e) if (opstat.get_status()): swuw.display_dict_maint(dictname, None) else: display_exception(opstat) return elif (optionId == swum.CLEAR_DICT_OPTION): swuw.get_sw_utilities_main_taskbar() #print("swum.CLEAR_DICT_OPTION",parms) swuw.display_dict_maint() return if (optionId == swum.ADD_DICT_OPTION): swuw.get_sw_utilities_main_taskbar() #print("swum.ADD_DICT_OPTION",parms) fparms = swuw.get_sw_utilities_dict_inputs(parms) #print("fparms",fparms) newdictname = fparms[1] newdictvalues = fparms[2] newdictfname = fparms[3] if (len(newdictname) > 0): if (len(newdictvalues) > 0): if (len(newdictfname) > 0): swum.add_Dict(newdictname, None, swum.USER_CREATED, newdictfname) else: try: newdict = json.loads(newdictvalues) swum.add_Dict(newdictname, newdict, swum.USER_CREATED, None) except Exception as e: opstat.store_exception("user dict is invalid ", e) else: opstat.set_status(False) opstat.set_errorMsg("No dict values defined") else: opstat.set_status(False) opstat.set_errorMsg("No Dict Name Specified") if (opstat.get_status()): swuw.display_list_dict(swum.DICT_ID) else: display_exception(opstat) return if (optionId == swum.LOAD_LIST_OPTION): swuw.get_sw_utilities_main_taskbar() #print("swum.LOAD_LIST_OPTION\n",parms) fparms = get_parms_for_input(parms, swuw.maint_list_utility_input_idList) #print("fparms",fparms) swuw.display_list_maint(None, fparms[3]) return if (optionId == swum.DELETE_LIST_OPTION): swuw.get_sw_utilities_main_taskbar() #print("swum.DELETE_LIST_OPTION",parms) fparms = get_parms_for_input(parms, swuw.maint_list_utility_input_idList) #print("fparms",fparms) listname = fparms[0] opstat = opStatus() for i in range(len(swum.ReservedDicts)): if (listname == swum.ReservedDicts[i]): opstat.set_status(False) opstat.set_errorMsg("List to delete is a system reserved dict") if (opstat.get_status()): if (len(listname) == 0): opstat.set_status(False) opstat.set_errorMsg("Invalid list name to delete") else: swum.delete_List(listname) swuw.display_list_maint() if (not (opstat.get_status())): display_exception(opstat) return if (optionId == swum.LOAD_DICT_OPTION): swuw.get_sw_utilities_main_taskbar() #print("swum.LOAD_DICT_OPTION\n",parms) fparms = get_parms_for_input(parms, swuw.maint_dict_utility_input_idList) #@print("fparms",fparms) swuw.display_dict_maint(None, fparms[3]) return if (optionId == swum.DELETE_DICT_OPTION): swuw.get_sw_utilities_main_taskbar() #print("swum.DELETE_DICT_OPTION",parms) fparms = get_parms_for_input(parms, swuw.maint_dict_utility_input_idList) #print("fparms",fparms) dictname = fparms[0] opstat = opStatus() for i in range(len(swum.ReservedDicts)): if (dictname == swum.ReservedDicts[i]): opstat.set_status(False) opstat.set_errorMsg("Dict to delete is a system reserved dict") if (opstat.get_status()): if (len(dictname) == 0): opstat.set_status(False) opstat.set_errorMsg("Invalid dict name to delete") else: swum.delete_Dict(dictname) swuw.display_dict_maint() if (not (opstat.get_status())): display_exception(opstat) return
def process_sort_by_column(parms,display=True) : """ * -------------------------------------------------------------------------- * function : sort by column transform option * * parms : * parms - associated parms * display - display results flag * * returns : * N/A * -------------------------------------------------------- """ opstat = opStatus() fparms = get_parms_for_input(parms,dftw.sort_column_input_idList) colname = fparms[0] sortorder = fparms[1] if(sortorder == "True") : sortorder = True else : sortorder = False sortkind = fparms[2] sortkind = sortkind.lstrip("'") sortkind = sortkind.rstrip("'") naposition = fparms[3] naposition = naposition.lstrip("'") naposition = naposition.rstrip("'") resetrowids = fparms[4] if(resetrowids == "True") : resetrowids = True else : resetrowids = False if(opstat.get_status()) : try : df = cfg.get_current_chapter_df(cfg.DataTransform_ID) df.sort_values(colname,axis=0,ascending=sortorder,inplace=True,kind=sortkind,na_position=naposition) if(resetrowids) : from dfcleanser.data_transform.data_transform_dataframe_control import reset_df_index opstat = reset_df_index() if(display) : #make scriptable add_to_script(["# sort by column ", "from dfcleanser.data_transform.data_transform_columns_control import process_sort_by_column", "process_sort_by_column(" + json.dumps(parms) + ",False)"],opstat) opstat.set_errorMsg("df sorted by column '" + colname + "' successfully.") except Exception as e: opstat.store_exception("Sort df By Column Error : "+colname,e) cfg.drop_config_value(dftw.sort_column_input_id+"Parms") return(opstat)
def drop_duplicate_rows(parms,display=True): """ * -------------------------------------------------------------------------- * function : drop df duplicate rows * * parms : * parms - transform parms * display - display flag * * returns : * N/A * -------------------------------------------------------- """ opstat = opStatus() fparms = get_parms_for_input(parms,dftw.df_drop_dups_transform_input_idList) colnames = fparms[0] if(len(colnames) == 0) : colnames = None if(fparms[2] == "Drop") : drop = True else : drop = False keep = fparms[3] if(keep == "False") : keep = False df = cfg.get_current_chapter_df(cfg.DataTransform_ID) if(not (colnames is None)) : if(not drop) : fcolnames = [] colslist = df.columns.tolist() for i in range(len(colslist)) : if(not (colslist[i] in colnames)) : fcolnames.append(colslist[i]) colnames = fcolnames if(opstat.get_status()) : try : df.drop_duplicates(colnames,keep=keep,inplace=True) if(display) : #make scriptable add_to_script(["# drop duplicate rows", "from dfcleanser.data_transform.data_transform_dataframe_control drop_duplicate_rows", "drop_duplicate_rows("+ json.dumps(parms) + ",False)"],opstat) except Exception as e: opstat.store_exception("Unable to drop duplicate rows : " + colnames,e) return(opstat)
def set_df_index(parms,display=True): """ * -------------------------------------------------------------------------- * function : set df indices * * parms : * parms - transform parms * display - display flag * * returns : * N/A * -------------------------------------------------------- """ opstat = opStatus() fparms = get_parms_for_input(parms,dftw.df_set_index_transform_input_idList) colnames = fparms[0] if(len(colnames) == 0) : opstat.set_status(False) opstat.set_errorMsg("column names list is empty") else : colnames = colnames.lstrip("[") colnames = colnames.rstrip("]") colnames = colnames.split(",") if(fparms[2] == "True") : drop = True else : drop = False if(opstat.get_status()) : if(fparms[3] == "True") : verify = True else : verify = False if(opstat.get_status()) : try : df = cfg.get_current_chapter_df(cfg.DataTransform_ID) df.set_index(colnames,drop=drop,append=True,inplace=True,verify_integrity=verify) cfg.set_dfc_dataframe_df(cfg.get_config_value(cfg.CURRENT_TRANSFORM_DF),df) if(display) : #make scriptable add_to_script(["# set df index", "from dfcleanser.data_transform.data_transform_dataframe_control set_df_index", "set_df_index(" + json.dumps(parms[1]) + ",False)"],opstat) except Exception as e: opstat.store_exception("Unable to set index of column(s) : " + str(colnames),e) return(opstat)
def append_to_df_index(parms,display=True): """ * -------------------------------------------------------------------------- * function : append column to df indices * * parms : * parms - transform parms * display - display flag * * returns : * N/A * -------------------------------------------------------- """ opstat = opStatus() fparms = get_parms_for_input(parms,dftw.df_append_index_transform_input_idList) colnames = fparms[0] colnames = colnames.lstrip("[") colnames = colnames.rstrip("]") colnames = colnames.split(",") if(len(colnames) == 0) : opstat.set_status(False) opstat.set_errorMsg("column names list is empty") else : df = cfg.get_current_chapter_df(cfg.DataTransform_ID) if(fparms[2] == "True") : drop = True else : drop = False if(fparms[3] == "True") : verify = True else : verify = False try : """ df.reset_index(drop=False,inplace=True) cnames = list(df.columns) levels_to_drop = [] for i in range(len(cnames)) : if(cnames[i].find("level_") > -1) : levels_to_drop.append(cnames[i]) if(len(levels_to_drop) > 0) : df.drop(levels_to_drop,axis=1,inplace=True) """ df.set_index(keys=colnames,drop=drop,append=True,inplace=True,verify_integrity=verify) if(display) : #make scriptable add_to_script(["# append to df index", "from dfcleanser.data_transform.data_transform_dataframe_control append_to_df_index", "append_to_df_index(" + json.dumps(parms[1]) + ",False)"],opstat) except Exception as e: opstat.store_exception("Unable to append to df index : " + colnames,e) return(opstat)
def add_column_names_row(parms,display=True): """ * -------------------------------------------------------------------------- * function : add a column names row * * parms : * parms - transform parms * display - display flag * * returns : * N/A * -------------------------------------------------------- """ opstat = opStatus() try : fparms = get_parms_for_input(parms,dftw.df_add_row_transform_input_idList) filename = fparms[0] collist = fparms[1] if(len(filename) == 0) : filename = "None" if(len(collist) == 0 ) : collist = "None" else : collist = collist.replace("'","") collist = collist.split(",") if( (not(filename == "None")) or (not(collist == "None"))) : if(not(filename == "None")) : try : with open(filename, 'r') as colid_file : colids = json.load(colid_file) colid_file.close() except Exception as e: opstat.store_exception("Unable to open column names file" + filename,e) else : colids = collist cfg.get_current_chapter_df(cfg.CURRENT_TRANSFORM_DF).columns = colids if(display) : #make scriptable add_to_script(["# Add Column Names Row", "from dfcleanser.data_transform.data_transform_dataframe_control add_column_names_row", "add_column_names_row(" + single_quote(filename) +"," + json.dumps(collist) + ",False)"],opstat) else : opstat.set_status(False) opstat.set_errorMsg("No Column List or filename defined") except Exception as e: opstat.store_exception("Unable to add column names",e) return(opstat)
def reset_df_index(parms,display=True): """ * -------------------------------------------------------------------------- * function : reset df indices * * parms : * parms - transform parms * display - display flag * * returns : * N/A * -------------------------------------------------------- """ opstat = opStatus() df = cfg.get_current_chapter_df(cfg.DataTransform_ID) fparms = get_parms_for_input(parms,dftw.df_reset_index_transform_input_idList) drop_levels = fparms[0] if(len(drop_levels) > 0) : drop_levels = drop_levels.lstrip("[") drop_levels = drop_levels.rstrip("]") drop_levels = drop_levels.split(",") if(drop_levels[0] == "All") : drop_levels = [] index_columns = df.index.names if(len(index_columns) > 0) : for i in range(len(index_columns)) : if( not (index_columns[i] is None) ) : drop_levels.append(index_columns[i]) else : drop_levels = None if(fparms[2] == "True") : drop = False else : drop = True if(opstat.get_status()) : try : df.reset_index(level=drop_levels,drop=drop,inplace=True) if(display) : #make scriptable add_to_script(["# reset df index", "from dfcleanser.data_transform.data_transform_dataframe_control reset_df_index", "reset_df_index(" + json.dumps(parms[1]) + ",False)"],opstat) except Exception as e: opstat.store_exception("Unable to reset df index : ",e) return(opstat)
def display_dfsubset_utility(optionId,parms=None) : """ * --------------------------------------------------------- * function : main subset utility control * * parms : * optionId - function to run * parms - parms to ryn function * * returns : * NA * -------------------------------------------------------- """ if(cfg.is_a_dfc_dataframe_loaded()) : from IPython.display import clear_output clear_output() from dfcleanser.common.html_widgets import define_inputs, are_owner_inputs_defined if(not (are_owner_inputs_defined(cfg.SWDFSubsetUtility_ID)) ) : define_inputs(cfg.SWDFSubsetUtility_ID,swsw.SWUtility_subset_inputs) if(optionId == swsm.DISPLAY_MAIN) : swsw.get_dfsubset_main_taskbar() clear_sw_utility_dfsubsetdata() cfg.display_data_select_df(cfg.SWDFSubsetUtility_ID) swsm.clear_current_subset_data() elif(optionId == swsm.DISPLAY_GET_SUBSET) : swsm.clear_current_subset_data() if(DEBUG_SUBSET) : swsm.set_current_subset_sequence(swsm.dfc_subset_sequence()) print("\ncurrent_subset_sequence\n") print("input_df_title",swsm.get_current_subset_sequence().get_input_df_title()) print("get_sequence_title",swsm.get_current_subset_sequence().get_sequence_title()) print("get_sequence_steps",swsm.get_current_subset_sequence().get_sequence_steps()) if(not (swsm.get_current_subset_sequence().get_sequence_steps() is None)) : print("get_total_sequence_steps",swsm.get_current_subset_sequence().get_total_sequence_steps()) print("get_output_csv",swsm.get_current_subset_sequence().get_output_csv()) print("get_output_dfc_df_title",swsm.get_current_subset_sequence().get_output_dfc_df_title()) swsw.display_df_subset_setup() if(DEBUG_SUBSET) : print("DISPLAY_GET_SUBSET",parms) print("DISPLAY_GET_SUBSET : clear data") print(swsm.get_current_subset_sequence()) print(swsm.get_current_subset_df()) print(swsm.get_current_subset_step()) print("new_sequence") swsm.dump_current_step() swsm.dump_current_sequence() elif(optionId == swsm.PROCESS_GET_SUBSET) : current_step = swsm.get_current_subset_step() if(not (current_step is None)) : current_sequence = swsm.get_current_subset_sequence() current_sequence.add_step_to_sequence_steps(current_step) fparms = get_parms_for_input(parms,swsw.get_subset_input_idList) if(len(fparms) > 0) : df_title = fparms[0] df = cfg.get_dfc_dataframe_df(df_title) col_names = fparms[1] col_action = fparms[3] new_subset_df = drop_add_cols(col_names,col_action,df) new_subset_df_title = df_title new_subset_step = swsm.dfc_subset_step(new_subset_df_title,col_names,col_action) swsm.set_current_subset_step(new_subset_step) swsm.set_current_subset_df(new_subset_df) swsw.display_df_criteria(new_subset_df_title,new_subset_df) if(DEBUG_SUBSET) : print("\nPROCESS_GET_SUBSET\n ",parms,"\n ",fparms) swsm.dump_current_step() swsm.dump_current_sequence() elif(optionId == swsm.DISPLAY_SAVED_SUBSET) : swsw.display_saved_subset_sequences() if(DEBUG_SUBSET) : print("\nDISPLAY_SAVED_SUBSET",parms) elif(optionId == swsm.PROCESS_RUN_CRITERIA) : opstat = opStatus() fparms = get_parms_for_input(parms,swsw.get_subset_criteria_input_idList) subset_title = fparms[0] if(len(subset_title) == 0) : current_sequence = swsm.get_current_subset_sequence() total_steps = current_sequence.get_total_sequence_steps() current_step = swsm.get_current_subset_step() subset_title = current_step.get_input_subset_df_title() + "_subset_" + str(total_steps+1) criteria = fparms[2] if(len(criteria) > 0) : try : clock = RunningClock() clock.start() final_criteria = (swsm.starting_criteria_preamble + criteria + swsm.starting_criteria_postamble) exec(final_criteria) current_step = swsm.get_current_subset_step() current_step.set_criteria(criteria) current_step.set_output_subset_df_title(subset_title) clock.stop() except Exception as e: opstat.store_exception("Error running df_criteria " + criteria,e) clock.stop() if(opstat.get_status()) : swsw.display_process_subset() else : display_exception(opstat) if(DEBUG_SUBSET) : print("PROCESS_RUN_CRITERIA : End") swsm.dump_current_step() swsm.dump_current_sequence() elif(optionId == swsm.DISPLAY_SAVE_SUBSET) : fparms = get_parms_for_input(parms,swsw.get_subset_run_input_idList) current_sequence = swsm.get_current_subset_sequence() current_step = swsm.get_current_subset_step() current_sequence.add_step_to_sequence_steps(current_step) if(len(fparms) > 0) : df_title = fparms[0] df = swsm.get_current_subset_df() col_names = fparms[1] col_action = fparms[3] new_subset_df = drop_add_cols(col_names,col_action,df) swsw.display_save_subset(df_title,new_subset_df) if(DEBUG_SUBSET) : print("DISPLAY_SAVE_SUBSET",parms,fparms) swsm.dump_current_step() swsm.dump_current_sequence() elif(optionId == swsm.DISPLAY_SAVE_AND_GET_SUBSET) : fparms = get_parms_for_input(parms,swsw.get_subset_run_input_idList) current_sequence = swsm.get_current_subset_sequence() current_step = swsm.get_current_subset_step() current_sequence.add_step_to_sequence_steps(current_step) if(len(fparms) > 0) : df_title = fparms[0] col_names = fparms[1] col_action = fparms[3] new_subset_step = swsm.dfc_subset_step(df_title,col_names,col_action) df = swsm.get_current_subset_df() new_subset_df = drop_add_cols(col_names,col_action,df) swsm.set_current_subset_df(new_subset_df) swsm.set_current_subset_step(new_subset_step) swsw.display_df_criteria(df_title,new_subset_df) if(DEBUG_SUBSET) : print("PROCESS_SAVE_AND_GET_SUBSET",parms,fparms) swsm.dump_current_step() swsm.dump_current_sequence() elif(optionId == swsm.PROCESS_SAVE_SUBSET) : save_subset_run(parms,0) elif(optionId == swsm.PROCESS_SUBSET_SEQUENCE) : opstat = opStatus() fparms = get_parms_for_input(parms,swsw.get_subset_sequences_input_idList) sequence = fparms[0] run_option = fparms[1] saved_sequence = swsm.get_subset_sequence(sequence) first_step = saved_sequence.get_sequence_step(0) df_title = first_step.get_input_subset_df_title() df = cfg.get_dfc_dataframe_df(df_title) if(df is None) : swsw.get_dfsubset_main_taskbar() cfg.display_data_select_df(cfg.SWDFSubsetUtility_ID) opstat.set_status(False) opstat.set_errorMsg("subset sequence starting df '" + df_title + "' is not currently loaded in dfc") display_exception(opstat) else : if(run_option == "Auto Run") : total_steps = saved_sequence.get_total_sequence_steps() swsm.set_current_subset_df(df) for i in range(total_steps) : current_step = saved_sequence.get_sequence_step(i) current_columns = current_step.get_col_names_list() columns_action = current_step.get_keep_drop_flag() criteria = current_step.get_criteria() output_df_title = current_step.get_output_subset_df_title() current_df = swsm.get_current_subset_df() if(len(current_columns) > 0) : colnames = list(current_df.columns) drop_columns = [] for i in range(len(colnames)) : if(columns_action == "Keep") : if(not (colnames[i] in current_columns)) : drop_columns.append(colnames[i]) else : if(colnames[i] in current_columns) : drop_columns.append(colnames[i]) if(len(drop_columns) > 0 ) : try : current_df.drop(drop_columns, axis=1, inplace=True) except : opstat.set_status(False) opstat.set_errorMsg("Unable to drop columns from subset dataframe") swsm.set_current_subset_df(current_df) try : current_df = swsm.get_current_subset_df() exec(criteria + swsm.starting_criteria_postamble) current_df = swsm.get_current_subset_df() except Exception as e: opstat.store_exception("Error running subset sequence '" + sequence + "'",e) swsw.display_save_subset(output_df_title,swsm.get_current_subset_df(),True) else : total_steps = saved_sequence.get_total_sequence_steps() swsm.set_current_subset_df(df) current_step = saved_sequence.get_sequence_step(0) current_df_title = current_step.get_input_subset_df_title() current_columns = current_step.get_col_names_list() columns_action = current_step.get_keep_drop_flag() swsw.display_manual_df_subset_setup(saved_sequence.get_sequence_title(),current_df_title,current_columns,columns_action,0) swsm.set_current_subset_step_id(0) swsm.set_current_subset_sequence(saved_sequence) elif(optionId == swsm.PROCESS_GET_NEXT_SUBSET) : fparms = get_parms_for_input(parms,swsw.get_manual_input_idList) collist = fparms[1] collist = collist.lstrip("[") collist = collist.rstrip("]") collist = collist.split(",") keep_drop = fparms[3] saved_sequence = swsm.get_current_subset_sequence() total_steps = saved_sequence.get_total_sequence_steps() current_step_id = swsm.get_current_subset_step_id() if(current_step_id < total_steps) : current_step = saved_sequence.get_sequence_step(swsm.get_current_subset_step_id()) else : swsm.set_current_subset_step_col_names_list(collist) swsm.set_current_subset_keep_drop_flag(keep_drop) current_step = swsm.get_current_subset_step() swsm.dump_current_step() swsm.set_current_subset_step(current_step) current_df_title = current_step.get_input_subset_df_title() current_df = swsm.get_current_subset_df() current_columns = current_step.get_col_names_list() columns_action = current_step.get_keep_drop_flag() criteria = current_step.get_criteria() output_df_title = current_step.get_output_subset_df_title() if(len(current_columns) > 0) : colnames = list(current_df.columns) drop_columns = [] for i in range(len(colnames)) : if(columns_action == "Keep") : if(not (colnames[i] in current_columns)) : drop_columns.append(colnames[i]) else : if(colnames[i] in current_columns) : drop_columns.append(colnames[i]) if(len(drop_columns) > 0 ) : try : current_df.drop(drop_columns, axis=1, inplace=True) except : opstat.set_status(False) opstat.set_errorMsg("Unable to drop columns from subset dataframe") swsw.display_next_criteria(current_df_title,current_df,criteria,output_df_title) elif(optionId == swsm.PROCESS_NEXT_CRITERIA) : opstat = opStatus() fparms = get_parms_for_input(parms,swsw.get_next_criteria_input_idList) output_df_title = fparms[0] criteria = fparms[2] current_sequence = swsm.get_subset_sequence(sequence) sequence_title = current_sequence.get_sequence_title() try : current_df = swsm.get_current_subset_df() exec(criteria + swsm.starting_criteria_postamble) current_df = swsm.get_current_subset_df() except Exception as e: opstat.store_exception("Error running subset sequence '" + sequence_title + "'",e) current_df_title = current_step.get_input_subset_df_title() current_df = swsm.get_current_subset_df() criteria = current_step.get_criteria() output_df_title = current_step.get_output_subset_df_title() swsw.display_next_criteria(current_df_title,current_df,criteria,output_df_title) display_exception(opstat) if(opstat.get_status()) : swsm.set_current_subset_df(current_df) swsm.set_current_subset_step_id(swsm.get_current_subset_step_id() + 1) if(swsm.get_current_subset_step_id() >= swsm.get_current_subset_sequence().get_total_sequence_steps()) : swsw.display_sequence_save_subset(output_df_title,swsm.get_current_subset_df()) else : current_step = swsm.get_current_subset_sequence().get_sequence_step(swsm.get_current_subset_step_id()) current_df_title = current_step.get_input_subset_df_title() current_columns = current_step.get_col_names_list() columns_action = current_step.get_keep_drop_flag() swsw.display_manual_df_subset_setup(swsm.get_current_subset_sequence().get_sequence_title(),current_df_title,current_columns,columns_action,swsm.get_current_subset_step_id()) elif(optionId == swsm.DISPLAY_NEW_STEP) : current_sequence = swsm.get_current_subset_sequence() sequence_title = current_sequence.get_sequence_title() current_step = swsm.get_current_subset_step() df_title = current_step.get_output_subset_df_title() current_df = swsm.get_current_subset_df() current_columns = [] current_action = "Keep" criteria = swsm.starting_criteria output_df_title = "" current_step = swsm.dfc_subset_step(df_title,current_columns,current_action,criteria,output_df_title) swsm.set_current_subset_step(current_step) swsw.display_manual_df_subset_setup(sequence_title,df_title,current_columns,current_action,swsm.get_current_subset_step_id()) #swsw.display_next_criteria(df_title,current_df,criteria,output_df_title) elif(optionId == swsm.PROCESS_SAVE_SAVED_SUBSET ) : save_subset_run(parms,1) elif(optionId == swsm.DISPLAY_GET_REMOTE_SUBSET) : chapterid = parms[0] new_config_df = None if(chapterid == cfg.DataInspection_ID) : new_config_df = cfg.get_config_value(cfg.CURRENT_INSPECTION_DF) elif(chapterid == cfg.DataCleansing_ID) : new_config_df = cfg.get_config_value(cfg.CURRENT_CLEANSE_DF) elif(chapterid == cfg.DataTransform_ID) : new_config_df = cfg.get_config_value(cfg.CURRENT_TRANSFORM_DF) elif(chapterid == cfg.DataExport_ID) : new_config_df = cfg.get_config_value(cfg.CURRENT_EXPORT_DF) elif(chapterid == cfg.DataImport_ID) : new_config_df = cfg.get_config_value(cfg.CURRENT_IMPORT_DF) elif(chapterid == cfg.SWGeocodeUtility_ID) : new_config_df = cfg.get_config_value(cfg.CURRENT_GEOCODE_DF) elif(chapterid == cfg.SWDFSubsetUtility_ID) : new_config_df = cfg.get_config_value(cfg.CURRENT_SUBSET_DF) cfg.set_config_value(cfg.CURRENT_SUBSET_DF,new_config_df) swsm.clear_current_subset_data() swsw.display_df_subset_setup() else : swsw.get_dfsubset_main_taskbar() cfg.drop_config_value(cfg.CURRENT_SUBSET_DF) clear_sw_utility_dfsubsetdata() cfg.display_data_select_df(cfg.SWDFSubsetUtility_ID) if(not(optionId == swsm.DISPLAY_MAIN)) : cfg.display_no_dfs(cfg.SWDFSubsetUtility_ID)
def save_subset_run(parms,saveid) : opstat = opStatus() if(saveid == 0) : fparms = get_parms_for_input(parms,swsw.get_subset_save_input_idList) else : fparms = get_parms_for_input(parms,swsw.get_saved_save_input_idList) dfc_df_title = fparms[0] csv_file_name = fparms[1] save_subset_name = fparms[2] if(saveid == 1) : drop_original = fparms[3] if(drop_original == "False") : drop_original = False else : drop_original = True else : drop_original = False if(len(dfc_df_title) > 0) : try : new_dfc_df = cfg.dfc_dataframe(dfc_df_title,swsm.get_current_subset_df(), "df from dataframe subset utility") cfg.add_dfc_dataframe(new_dfc_df) except Exception as e: opstat.store_exception("Error saving subset df as dfc df " + dfc_df_title,e) if(len(csv_file_name) > 0) : try : swsm.get_current_subset_df().to_csv(csv_file_name,index=False) except Exception as e: opstat.store_exception("Error saving subset df as csv file " + csv_file_name,e) if(len(save_subset_name) > 0) : try : swsm.add_subset_sequence(save_subset_name) except Exception as e: opstat.store_exception("Error saving subset sequence " + save_subset_name,e) if(drop_original) : current_sequence = swsm.get_current_subset_sequence() sequence_title = current_sequence.get_sequence_title() swsm.drop_subset_sequence(sequence_title) swsw.get_dfsubset_main_taskbar() cfg.display_data_select_df(cfg.SWDFSubsetUtility_ID) print("\n") if(opstat.get_status()) : status_msg = "" if(len(dfc_df_title) > 0) : status_msg = status_msg + ("Final Subset df saved as dfc df : " + dfc_df_title + "<br><br>") if(len(csv_file_name) > 0) : status_msg = status_msg + ("Final Subset df saved as excel file : " + csv_file_name + "<br><br>") if(len(save_subset_name) > 0) : status_msg = status_msg + ("Subset sequence saved as : " + save_subset_name + "<br>") if(drop_original) : status_msg = status_msg + ("Original Subset sequence dropped : " + sequence_title + "<br>") if(len(status_msg) > 0) : display_status(status_msg) else : display_exception(opstat) swsm.clear_current_subset_data() if(DEBUG_SUBSET) : print("PROCESS_SAVE_SUBSET",parms,fparms) print("dfc_df_title",dfc_df_title) print("csv_file_name",csv_file_name) print("save_subset_name",save_subset_name) swsm.dump_current_step() swsm.dump_current_sequence()
def display_dc_pandas_export_sql_inputs(fId, dbId, dbconparms, exportparms=None): """ * -------------------------------------------------------------------------- * function : display pandas sql export form * * parms : * fid - export type * dbid - database id * dbconparms - db connector parms * exportparms - export parms * * returns : N/A * -------------------------------------------------------- """ opstat = opStatus() opstatStatus = True listHtml = "" dbid = int(dbId) fid = int(fId) fparms = None if (fid == 0): dbid = cfg.get_config_value(cfg.CURRENT_DB_ID_KEY) from dfcleanser.data_import.data_import_widgets import ( get_table_names, TABLE_NAMES, get_rows_html) tablelist = get_table_names(dbid, opstat) listHtml = get_rows_html(tablelist, TABLE_NAMES, True) elif (fid == 1): fparms = get_parms_for_input(exportparms, pandas_export_sqltable_idList) dbid = cfg.get_config_value(cfg.CURRENT_DB_ID_KEY) cfg.set_config_value(pandas_export_sqltable_id + "Parms", fparms) if (len(fparms[0]) > 0): from dfcleanser.data_import.data_import_widgets import ( get_column_names, get_rows_html) from dfcleanser.data_import.data_import_model import COLUMN_NAMES columnlist = get_column_names(dbid, fparms[1], opstat) listHtml = get_rows_html(columnlist, COLUMN_NAMES, True) else: opstat.set_status(False) opstat.set_errorMsg("No Table Selected") elif (fid == 2): cfg.set_config_value(cfg.CURRENT_DB_ID_KEY, dbid) dbcondict = {} if (not (dbconparms == None)): parse_connector_parms(dbconparms, dbid, dbcondict) else: conparms = get_stored_con_Parms(dbid) parse_connector_parms(conparms, dbid, dbcondict) listHtml = get_db_connector_list(dbid, dbcondict) if (not (opstat.get_status())): dbcondict = {} conparms = get_stored_con_Parms(dbid) parse_connector_parms(conparms, dbid, dbcondict) listHtml = get_db_connector_list(dbid, dbcondict) opstatStatus = opstat.get_status() opstatErrormsg = opstat.get_errorMsg() opstat.set_status(True) if (opstat.get_status()): export_sql_input_form = InputForm( pandas_export_sqltable_id, pandas_export_sqltable_idList, pandas_export_sqltable_labelList, pandas_export_sqltable_typeList, pandas_export_sqltable_placeholderList, pandas_export_sqltable_jsList, pandas_export_sqltable_reqList) selectDicts = [] df_list = cfg.get_dfc_dataframes_select_list(cfg.DataExport_ID) selectDicts.append(df_list) exists = {"default": "fail", "list": ["fail", "replace", "append"]} selectDicts.append(exists) index = {"default": "True", "list": ["True", "False"]} selectDicts.append(index) get_select_defaults(export_sql_input_form, pandas_export_sqltable_id, pandas_export_sqltable_idList, pandas_export_sqltable_typeList, selectDicts) export_sql_input_form.set_shortForm(False) export_sql_input_form.set_gridwidth(680) export_sql_input_form.set_custombwidth(125) export_sql_input_form.set_fullparms(True) export_sql_input_html = "" export_sql_input_html = export_sql_input_form.get_html() export_sql_heading_html = "<div>" + get_pandas_export_input_title( dem.SQLTABLE_EXPORT, dbid) + "</div><br>" if (not (exportparms == None)): cfg.set_config_value(pandas_export_sqltable_id + "Parms", fparms) gridclasses = [ "dfcleanser-common-grid-header", "dfc-left", "dfc-right" ] gridhtmls = [export_sql_heading_html, listHtml, export_sql_input_html] display_generic_grid("data-import-sql-table-wrapper", gridclasses, gridhtmls) if (not (opstatStatus)): opstat.set_status(opstatStatus) opstat.set_errorMsg(opstatErrormsg) display_exception(opstat)
def get_sqltable_export_inputs(parms): return (get_parms_for_input(parms, pandas_export_sqltable_idList))
def get_html_export_inputs(parms): return (get_parms_for_input(parms, pandas_export_html_idList))