def upperCase_df_column(dftitle, dfcolname): """ * ------------------------------------------------------------------------ * function : convert string column to upper case * * parms : * dftitle - dataframe title * dfcolname - dataframe column to normalize * * returns : * Successful : upper cased columns list * Error : opstat * * Notes : * dfcleanser generic function * ------------------------------------------------------------------------- """ opstat = opStatus() df = cfg.get_dfc_dataframe_df(dftitle) new_col_values = [] try: new_col_values = map(lambda x: x.upper(), df[dfcolname]) return (new_col_values) except Exception as e: opstat.store_exception( "'upperCase_df_column' error : " + dftitle + " " + dfcolname, e) return (opstat)
def normalize_df_column(dftitle, dfcolname): """ * ------------------------------------------------------------------------ * function : normalize a dataframe column * * parms : * dftitle - dataframe title * dfcolname - dataframe column to normalize * * returns : * Successful : normalized column list * Error : opstat * * Notes : * dfcleanser generic function * ------------------------------------------------------------------------- """ opstat = opStatus() df = cfg.get_dfc_dataframe_df(dftitle) from sklearn.preprocessing import MinMaxScaler try: scaler = MinMaxScaler() scaled_values = scaler.fit_transform(df[dfcolname]) return (scaled_values) except Exception as e: opstat.store_exception( "'normalize_df_column' error : " + dftitle + " " + dfcolname, e) return (opstat)
def absolute_df_column(dftitle, dfcolname): """ * ------------------------------------------------------------------------ * function : convert dataframe column to absolute value * * parms : * dftitle - dataframe title * dfcolname - dataframe column to apply trig function to * * returns : * Successful : col list of abs values * Error : opstat * * Notes : * dfcleanser generic function * ------------------------------------------------------------------------- """ opstat = opStatus() df = cfg.get_dfc_dataframe_df(dftitle) import numpy as np colabsolutes = np.array() try: colabsolutes = np.absolute(df[dfcolname]) return (colabsolutes) except Exception as e: opstat.store_exception( "'absolute_df_column' error : " + dftitle + " " + dfcolname, e) return (opstat)
def drop_working_df(): from dfcleanser.common.cfg import get_dfc_dataframe_df, drop_dfc_dataframe df = get_dfc_dataframe_df("dfc_subset_working_df") if (not (df is None)): drop_dfc_dataframe("dfc_subset_working_df")
def get_df_geocode_center(dftitle, dfcolname): """ * ------------------------------------------------------------------------ * function : get the center point of a dataframe locations column * * parms : * dftitle - dataframe name * dfcolname - dataframe column to use for locations * * returns : * center point if no exception * opStatus object if exception * * Notes : * dfcleanser generic function * ------------------------------------------------------------------------- """ opstat = opStatus() import json geocoords = [] df = cfg.get_dfc_dataframe_df(dftitle) if (len(dfcolname == 1)): geocoords = df[dfcolname[0]].tolist() if (type(geocoords[0]) == str): geocoords = json.dumps(geocoords) elif (len(dfcolname) == 2): geolats = df[dfcolname[0]].tolist() if (type(geolats[0]) == str): geolats = json.dumps(geolats) geolongs = df[dfcolname[1]].tolist() if (type(geolongs[0]) == str): geolongs = json.dumps(geolongs) for i in range(len(geolats)): geocoords.append([geolats[i], geolongs[i]]) else: opstat.set_status(False) opstat.set_errorMsg( "get_df_geocode_center Error : column names list is invalid") if (opstat.get_status()): return (get_geocode_center(geocoords, opstat)) else: return (opstat)
def get_trig_values_for_column(dftitle, dfcolname, trigfunc): """ * ------------------------------------------------------------------------ * function : get trig column values * * parms : * dftitle - dataframe title * dfcolname - dataframe column to apply trig function to * trigfunc - trig function to apply * ('sin','cos','tan','arcsin','arccos','arctan') * * returns : * Successful : col list of trig values * Error : opstat * * Notes : * dfcleanser generic function * ------------------------------------------------------------------------- """ opstat = opStatus() df = cfg.get_dfc_dataframe_df(dftitle) try: import numpy as np trigcol = np.array() if (trigfunc == 'sin'): trigcol = np.sin(df[dfcolname]) elif (trigfunc == 'cos'): trigcol = np.cos(df[dfcolname]) elif (trigfunc == 'tan'): trigcol = np.tan(df[dfcolname]) elif (trigfunc == 'arcsin'): trigcol = np.arcsin(df[dfcolname]) elif (trigfunc == 'arccos'): trigcol = np.arccos(df[dfcolname]) elif (trigfunc == 'arctan'): trigcol = np.arctan(df[dfcolname]) else: trigcol = None return (trigcol) except Exception as e: opstat.store_exception( "'get_trig_values_for_column' error : " + dftitle + " " + dfcolname + " " + trigfunc, e) return (opstat)
def random_float_range(dftitle, randomFloatLower, randomFloatUpper): """ * ------------------------------------------------------------------------ * function : generate column of random floats in a range * * parms : * dftitle - dataframe title * randomFloatLower - random integer lower range value * randomFloatUpper - random integer upper range value * * returns : * Successful : cols list of random floats * Error : opstat * * Notes : * dfcleanser generic function * ------------------------------------------------------------------------- """ opstat = opStatus() df = cfg.get_dfc_dataframe_df(dftitle) import numpy as np import random colrandfloats = np.array() try: for i in range(len(df)): colrandfloats.append( random.randrange(float(randomFloatLower), float(randomFloatUpper))) return (colrandfloats) except Exception as e: opstat.store_exception( "'random_float_range' error : " + dftitle + " " + str(randomFloatLower) + " " + str(randomFloatUpper), e) return (opstat)
def round_df_column(dftitle, dfcolname, decimals): """ * ------------------------------------------------------------------------ * function : round float column to decials range * * parms : * dftitle - dataframe title * dfcolname - dataframe column to round * decimals - rounding precision * 0 - round to int * * returns : * Successful : roundex col vals list * Error : opstat * * Notes : * dfcleanser generic function * ------------------------------------------------------------------------- """ opstat = opStatus() df = cfg.get_dfc_dataframe_df(dftitle) import numpy as np dfrounds = np.array() try: if (decimals == 0): dfrounds = np.rint(df[dfcolname]) else: dfrounds = np.round_(df[dfcolname, decimals]) return (dfrounds) except Exception as e: opstat.store_exception( "'round_df_column' error : " + dftitle + " " + dfcolname + " " + str(decimals), e) return (opstat)
def convert_df_column_to_degrees_or_radians(dftitle, dfcolname, degrees): """ * ------------------------------------------------------------------------ * function : convert dataframe column to degrees or radians * * parms : * dftitle - dataframe title * dfcolname - dataframe column to apply trig function to * degrees - True - convert to degrees * False - conveet to radians * * returns : * Successful : converted column values list * Error : opstat * * Notes : * dfcleanser generic function * ------------------------------------------------------------------------- """ opstat = opStatus() df = cfg.get_dfc_dataframe_df(dftitle) import numpy as np colvalues = np.array() try: if (degrees): colvalues = np.degrees(df[dfcolname]) else: colvalues = np.radians(df[dfcolname]) return (colvalues) except Exception as e: opstat.store_exception( "'convert_df_column_to_degrees_or_radians' error : " + dftitle + " " + dfcolname + " " + str(degrees), e) return (opstat)
def display_df_subset_setup(): """ * -------------------------------------------------------------------------- * function : display current df subset form * * parms : * df - dataframe to subset from * filters - filters form * colname - filters column name * * returns : N/A * -------------------------------------------------------- """ df_title = cfg.get_config_value(cfg.CURRENT_SUBSET_DF) df = cfg.get_dfc_dataframe_df(df_title) col_stats_table = get_column_stats_table(df_title, df) from dfcleanser.common.html_widgets import InputForm subset_input_form = InputForm(get_subset_input_id, get_subset_input_idList, get_subset_input_labelList, get_subset_input_typeList, get_subset_input_placeholderList, get_subset_input_jsList, get_subset_input_reqList) selectDicts = [] dataframes = cfg.get_dfc_dataframes_select_list(cfg.SWDFSubsetUtility_ID) selectDicts.append(dataframes) current_df = cfg.get_current_chapter_df(cfg.SWDFSubsetUtility_ID) colnames = current_df.columns.tolist() cols_name_list = [" "] for i in range(len(colnames)): cols_name_list.append(colnames[i]) cnames = { "default": cols_name_list[0], "list": cols_name_list, "callback": "change_subset_cols" } selectDicts.append(cnames) subssel = {"default": "Keep", "list": ["Keep", "Drop"]} selectDicts.append(subssel) get_select_defaults(subset_input_form, get_subset_input_form[0], get_subset_input_form[1], get_subset_input_form[3], selectDicts) subset_input_form.set_shortForm(False) subset_input_form.set_gridwidth(680) subset_input_form.set_custombwidth(140) subset_input_form.set_fullparms(True) get_subset_input_html = subset_input_form.get_html() get_subset_heading_html = "<div>Get Dataframe Subset</div><br></br>" gridclasses = ["dfc-top", "dfcleanser-common-grid-header", "dfc-bottom"] gridhtmls = [ col_stats_table, get_subset_heading_html, get_subset_input_html ] print("\n") display_generic_grid("sw-utils-subset-wrapper", gridclasses, gridhtmls)
def export_pandas_sqltable(sqltableparms, dbcondict, exportid, display=True): """ * -------------------------------------------------------------------------- * function : export pandas dataframe into sql table * * parms : * sqltableparms - export parms * dbcondict - db connector dict * exportId - export id * display - display flag * * returns : N/A * -------------------------------------------------------- """ opstat = opStatus() import dfcleanser.common.db_utils as dbu dbcon = dbu.dbConnector() from dfcleanser.common.db_utils import grab_connection_parms if (dbcondict == None): parmslist = get_stored_con_Parms( cfg.get_config_value(cfg.CURRENT_DB_ID_KEY)) dbcondict = set_dbcon_dict(cfg.get_config_value(cfg.CURRENT_DB_ID_KEY), parmslist) dbconparms = grab_connection_parms(dbcondict) else: dbconparms = grab_connection_parms(dbcondict) dbcon.set_ConnectionParms(dbconparms) dbconnector = dbcon.connect_to_db(dbu.SQLALCHEMY, opstat) if (opstat.get_status()): if (len(sqltableparms) == 0): opstat.set_status(False) opstat.set_errorMsg("No Export parameters defined") else: if (sqltableparms[0] == ""): opstat.set_status(False) opstat.set_errorMsg("No dataframe selcted to export") else: if (sqltableparms[1] == ""): opstat.set_status(False) opstat.set_errorMsg("No tabl;e selcted to export to") else: df = cfg.get_dfc_dataframe_df(sqltableparms[0]) labellist = dew.pandas_export_sqltable_labelList try: sqlkeys = [ labellist[2], labellist[3], labellist[4], labellist[5], labellist[6], labellist[7] ] sqlvals = [ sqltableparms[2], sqltableparms[3], sqltableparms[4], sqltableparms[5], sqltableparms[6], sqltableparms[7] ] sqltypes = [ STRING_PARM, STRING_PARM, BOOLEAN_PARM, STRING_PARM, INT_PARM, DICT_PARM ] sqlparms = {} sqladdlparms = {} except Exception as e: opstat.set_status(False) opstat.store_exception("Error parsing Export parms", e) if (opstat.get_status()): try: sqlparms = get_function_parms( sqlkeys, sqlvals, sqltypes) if (not (sqltableparms[8] == "")): sqladdlparms = json.loads(sqltableparms[8]) if (len(sqladdlparms) > 0): addlparmskeys = sqladdlparms.keys() for i in range(len(addlparmskeys)): sqlparms.update({ addlparmskeys[i]: sqladdlparms.get(addlparmskeys[i]) }) except Exception as e: opstat.set_status(False) opstat.store_exception( "Error parsing Export additional parms", e) if (opstat.get_status()): try: df.to_sql(sqltableparms[1], dbconnector, **sqlparms) except Exception as e: opstat.store_exception( "Unable to export to sql table", e) export_notes = "" if (opstat.get_status()): if (display): #make scriptable add_to_script([ "# Export SQL Table ", "from dfcleanser.data_export.data_export_control export export_pandas_sqltable", "export_pandas_sqltable(" + json.dumps(sqltableparms) + "," + json.dumps(dbcondict) + "," + str(exportid) + ",False)" ], opstat) export_notes = dbu.get_SQLAlchemy_connector_string(dbconparms) if (len(sqltableparms) > 0): cfg.set_config_value(exportid + "Parms", sqltableparms) cfg.set_config_value(cfg.CURRENT_EXPORTED_FILE_NAME_KEY, sqltableparms[0], True) return (export_notes, opstat)
def export_pandas_html(fparms, exportId, labellist, display=True): """ * -------------------------------------------------------------------------- * function : pandas html export * * parms : * fparms - export parms * exportId - export id * labellist - parm label list * display - display flag * * returns : N/A * -------------------------------------------------------- """ opstat = opStatus() if (len(fparms) == 0): opstat.set_status(False) opstat.set_errorMsg("No Export parameters defined") else: try: htmlkeys = [labellist[2], labellist[3], labellist[4], labellist[5]] htmlvals = [fparms[2], fparms[3], fparms[4], fparms[5]] htmltypes = [INT_PARM, BOOLEAN_PARM, BOOLEAN_PARM, STRING_PARM] htmlparms = {} htmladdlparms = {} except Exception as e: opstat.store_exception("Error parsing import parms", e) if (opstat.get_status()): try: htmlparms = get_function_parms(htmlkeys, htmlvals, htmltypes) if (not (fparms[6] == "")): htmladdlparms = json.loads(fparms[6]) if (len(htmladdlparms) > 0): addlparmskeys = htmladdlparms.keys() for i in range(len(addlparmskeys)): htmlparms.update({ addlparmskeys[i]: htmladdlparms.get(addlparmskeys[i]) }) except Exception as e: opstat.store_exception("Unable to get additional parms", e) if (opstat.get_status()): if (fparms[0] == ""): opstat.set_status(False) opstat.set_errorMsg("No dataframe slected") else: df = cfg.get_dfc_dataframe_df(fparms[0]) try: if (len(htmlparms) > 0): df.to_html(fparms[1], **htmlparms) else: df.to_html(fparms[1]) except Exception as e: opstat.store_exception( "Unable to export html file" + fparms[0], e) if (opstat.get_status()): if (display): #make scriptable script = [ "# Export HTML File ", "dfcleanser.data_export.data_export_control import export_pandas_html", "export_pandas_html(" + json.dumps(fparms) + "," + str(exportId) + "," + json.dumps(labellist) + ",False)" ] add_to_script(script, opstat) if (len(fparms) > 0): cfg.set_config_value(exportId + "Parms", fparms) cfg.set_config_value(cfg.CURRENT_EXPORTED_FILE_NAME_KEY, fparms[0], True) return (opstat)
def display_dfsubset_utility(optionId,parms=None) : """ * --------------------------------------------------------- * function : main subset utility control * * parms : * optionId - function to run * parms - parms to ryn function * * returns : * NA * -------------------------------------------------------- """ if(cfg.is_a_dfc_dataframe_loaded()) : from IPython.display import clear_output clear_output() from dfcleanser.common.html_widgets import define_inputs, are_owner_inputs_defined if(not (are_owner_inputs_defined(cfg.SWDFSubsetUtility_ID)) ) : define_inputs(cfg.SWDFSubsetUtility_ID,swsw.SWUtility_subset_inputs) if(optionId == swsm.DISPLAY_MAIN) : swsw.get_dfsubset_main_taskbar() clear_sw_utility_dfsubsetdata() cfg.display_data_select_df(cfg.SWDFSubsetUtility_ID) swsm.clear_current_subset_data() elif(optionId == swsm.DISPLAY_GET_SUBSET) : swsm.clear_current_subset_data() if(DEBUG_SUBSET) : swsm.set_current_subset_sequence(swsm.dfc_subset_sequence()) print("\ncurrent_subset_sequence\n") print("input_df_title",swsm.get_current_subset_sequence().get_input_df_title()) print("get_sequence_title",swsm.get_current_subset_sequence().get_sequence_title()) print("get_sequence_steps",swsm.get_current_subset_sequence().get_sequence_steps()) if(not (swsm.get_current_subset_sequence().get_sequence_steps() is None)) : print("get_total_sequence_steps",swsm.get_current_subset_sequence().get_total_sequence_steps()) print("get_output_csv",swsm.get_current_subset_sequence().get_output_csv()) print("get_output_dfc_df_title",swsm.get_current_subset_sequence().get_output_dfc_df_title()) swsw.display_df_subset_setup() if(DEBUG_SUBSET) : print("DISPLAY_GET_SUBSET",parms) print("DISPLAY_GET_SUBSET : clear data") print(swsm.get_current_subset_sequence()) print(swsm.get_current_subset_df()) print(swsm.get_current_subset_step()) print("new_sequence") swsm.dump_current_step() swsm.dump_current_sequence() elif(optionId == swsm.PROCESS_GET_SUBSET) : current_step = swsm.get_current_subset_step() if(not (current_step is None)) : current_sequence = swsm.get_current_subset_sequence() current_sequence.add_step_to_sequence_steps(current_step) fparms = get_parms_for_input(parms,swsw.get_subset_input_idList) if(len(fparms) > 0) : df_title = fparms[0] df = cfg.get_dfc_dataframe_df(df_title) col_names = fparms[1] col_action = fparms[3] new_subset_df = drop_add_cols(col_names,col_action,df) new_subset_df_title = df_title new_subset_step = swsm.dfc_subset_step(new_subset_df_title,col_names,col_action) swsm.set_current_subset_step(new_subset_step) swsm.set_current_subset_df(new_subset_df) swsw.display_df_criteria(new_subset_df_title,new_subset_df) if(DEBUG_SUBSET) : print("\nPROCESS_GET_SUBSET\n ",parms,"\n ",fparms) swsm.dump_current_step() swsm.dump_current_sequence() elif(optionId == swsm.DISPLAY_SAVED_SUBSET) : swsw.display_saved_subset_sequences() if(DEBUG_SUBSET) : print("\nDISPLAY_SAVED_SUBSET",parms) elif(optionId == swsm.PROCESS_RUN_CRITERIA) : opstat = opStatus() fparms = get_parms_for_input(parms,swsw.get_subset_criteria_input_idList) subset_title = fparms[0] if(len(subset_title) == 0) : current_sequence = swsm.get_current_subset_sequence() total_steps = current_sequence.get_total_sequence_steps() current_step = swsm.get_current_subset_step() subset_title = current_step.get_input_subset_df_title() + "_subset_" + str(total_steps+1) criteria = fparms[2] if(len(criteria) > 0) : try : clock = RunningClock() clock.start() final_criteria = (swsm.starting_criteria_preamble + criteria + swsm.starting_criteria_postamble) exec(final_criteria) current_step = swsm.get_current_subset_step() current_step.set_criteria(criteria) current_step.set_output_subset_df_title(subset_title) clock.stop() except Exception as e: opstat.store_exception("Error running df_criteria " + criteria,e) clock.stop() if(opstat.get_status()) : swsw.display_process_subset() else : display_exception(opstat) if(DEBUG_SUBSET) : print("PROCESS_RUN_CRITERIA : End") swsm.dump_current_step() swsm.dump_current_sequence() elif(optionId == swsm.DISPLAY_SAVE_SUBSET) : fparms = get_parms_for_input(parms,swsw.get_subset_run_input_idList) current_sequence = swsm.get_current_subset_sequence() current_step = swsm.get_current_subset_step() current_sequence.add_step_to_sequence_steps(current_step) if(len(fparms) > 0) : df_title = fparms[0] df = swsm.get_current_subset_df() col_names = fparms[1] col_action = fparms[3] new_subset_df = drop_add_cols(col_names,col_action,df) swsw.display_save_subset(df_title,new_subset_df) if(DEBUG_SUBSET) : print("DISPLAY_SAVE_SUBSET",parms,fparms) swsm.dump_current_step() swsm.dump_current_sequence() elif(optionId == swsm.DISPLAY_SAVE_AND_GET_SUBSET) : fparms = get_parms_for_input(parms,swsw.get_subset_run_input_idList) current_sequence = swsm.get_current_subset_sequence() current_step = swsm.get_current_subset_step() current_sequence.add_step_to_sequence_steps(current_step) if(len(fparms) > 0) : df_title = fparms[0] col_names = fparms[1] col_action = fparms[3] new_subset_step = swsm.dfc_subset_step(df_title,col_names,col_action) df = swsm.get_current_subset_df() new_subset_df = drop_add_cols(col_names,col_action,df) swsm.set_current_subset_df(new_subset_df) swsm.set_current_subset_step(new_subset_step) swsw.display_df_criteria(df_title,new_subset_df) if(DEBUG_SUBSET) : print("PROCESS_SAVE_AND_GET_SUBSET",parms,fparms) swsm.dump_current_step() swsm.dump_current_sequence() elif(optionId == swsm.PROCESS_SAVE_SUBSET) : save_subset_run(parms,0) elif(optionId == swsm.PROCESS_SUBSET_SEQUENCE) : opstat = opStatus() fparms = get_parms_for_input(parms,swsw.get_subset_sequences_input_idList) sequence = fparms[0] run_option = fparms[1] saved_sequence = swsm.get_subset_sequence(sequence) first_step = saved_sequence.get_sequence_step(0) df_title = first_step.get_input_subset_df_title() df = cfg.get_dfc_dataframe_df(df_title) if(df is None) : swsw.get_dfsubset_main_taskbar() cfg.display_data_select_df(cfg.SWDFSubsetUtility_ID) opstat.set_status(False) opstat.set_errorMsg("subset sequence starting df '" + df_title + "' is not currently loaded in dfc") display_exception(opstat) else : if(run_option == "Auto Run") : total_steps = saved_sequence.get_total_sequence_steps() swsm.set_current_subset_df(df) for i in range(total_steps) : current_step = saved_sequence.get_sequence_step(i) current_columns = current_step.get_col_names_list() columns_action = current_step.get_keep_drop_flag() criteria = current_step.get_criteria() output_df_title = current_step.get_output_subset_df_title() current_df = swsm.get_current_subset_df() if(len(current_columns) > 0) : colnames = list(current_df.columns) drop_columns = [] for i in range(len(colnames)) : if(columns_action == "Keep") : if(not (colnames[i] in current_columns)) : drop_columns.append(colnames[i]) else : if(colnames[i] in current_columns) : drop_columns.append(colnames[i]) if(len(drop_columns) > 0 ) : try : current_df.drop(drop_columns, axis=1, inplace=True) except : opstat.set_status(False) opstat.set_errorMsg("Unable to drop columns from subset dataframe") swsm.set_current_subset_df(current_df) try : current_df = swsm.get_current_subset_df() exec(criteria + swsm.starting_criteria_postamble) current_df = swsm.get_current_subset_df() except Exception as e: opstat.store_exception("Error running subset sequence '" + sequence + "'",e) swsw.display_save_subset(output_df_title,swsm.get_current_subset_df(),True) else : total_steps = saved_sequence.get_total_sequence_steps() swsm.set_current_subset_df(df) current_step = saved_sequence.get_sequence_step(0) current_df_title = current_step.get_input_subset_df_title() current_columns = current_step.get_col_names_list() columns_action = current_step.get_keep_drop_flag() swsw.display_manual_df_subset_setup(saved_sequence.get_sequence_title(),current_df_title,current_columns,columns_action,0) swsm.set_current_subset_step_id(0) swsm.set_current_subset_sequence(saved_sequence) elif(optionId == swsm.PROCESS_GET_NEXT_SUBSET) : fparms = get_parms_for_input(parms,swsw.get_manual_input_idList) collist = fparms[1] collist = collist.lstrip("[") collist = collist.rstrip("]") collist = collist.split(",") keep_drop = fparms[3] saved_sequence = swsm.get_current_subset_sequence() total_steps = saved_sequence.get_total_sequence_steps() current_step_id = swsm.get_current_subset_step_id() if(current_step_id < total_steps) : current_step = saved_sequence.get_sequence_step(swsm.get_current_subset_step_id()) else : swsm.set_current_subset_step_col_names_list(collist) swsm.set_current_subset_keep_drop_flag(keep_drop) current_step = swsm.get_current_subset_step() swsm.dump_current_step() swsm.set_current_subset_step(current_step) current_df_title = current_step.get_input_subset_df_title() current_df = swsm.get_current_subset_df() current_columns = current_step.get_col_names_list() columns_action = current_step.get_keep_drop_flag() criteria = current_step.get_criteria() output_df_title = current_step.get_output_subset_df_title() if(len(current_columns) > 0) : colnames = list(current_df.columns) drop_columns = [] for i in range(len(colnames)) : if(columns_action == "Keep") : if(not (colnames[i] in current_columns)) : drop_columns.append(colnames[i]) else : if(colnames[i] in current_columns) : drop_columns.append(colnames[i]) if(len(drop_columns) > 0 ) : try : current_df.drop(drop_columns, axis=1, inplace=True) except : opstat.set_status(False) opstat.set_errorMsg("Unable to drop columns from subset dataframe") swsw.display_next_criteria(current_df_title,current_df,criteria,output_df_title) elif(optionId == swsm.PROCESS_NEXT_CRITERIA) : opstat = opStatus() fparms = get_parms_for_input(parms,swsw.get_next_criteria_input_idList) output_df_title = fparms[0] criteria = fparms[2] current_sequence = swsm.get_subset_sequence(sequence) sequence_title = current_sequence.get_sequence_title() try : current_df = swsm.get_current_subset_df() exec(criteria + swsm.starting_criteria_postamble) current_df = swsm.get_current_subset_df() except Exception as e: opstat.store_exception("Error running subset sequence '" + sequence_title + "'",e) current_df_title = current_step.get_input_subset_df_title() current_df = swsm.get_current_subset_df() criteria = current_step.get_criteria() output_df_title = current_step.get_output_subset_df_title() swsw.display_next_criteria(current_df_title,current_df,criteria,output_df_title) display_exception(opstat) if(opstat.get_status()) : swsm.set_current_subset_df(current_df) swsm.set_current_subset_step_id(swsm.get_current_subset_step_id() + 1) if(swsm.get_current_subset_step_id() >= swsm.get_current_subset_sequence().get_total_sequence_steps()) : swsw.display_sequence_save_subset(output_df_title,swsm.get_current_subset_df()) else : current_step = swsm.get_current_subset_sequence().get_sequence_step(swsm.get_current_subset_step_id()) current_df_title = current_step.get_input_subset_df_title() current_columns = current_step.get_col_names_list() columns_action = current_step.get_keep_drop_flag() swsw.display_manual_df_subset_setup(swsm.get_current_subset_sequence().get_sequence_title(),current_df_title,current_columns,columns_action,swsm.get_current_subset_step_id()) elif(optionId == swsm.DISPLAY_NEW_STEP) : current_sequence = swsm.get_current_subset_sequence() sequence_title = current_sequence.get_sequence_title() current_step = swsm.get_current_subset_step() df_title = current_step.get_output_subset_df_title() current_df = swsm.get_current_subset_df() current_columns = [] current_action = "Keep" criteria = swsm.starting_criteria output_df_title = "" current_step = swsm.dfc_subset_step(df_title,current_columns,current_action,criteria,output_df_title) swsm.set_current_subset_step(current_step) swsw.display_manual_df_subset_setup(sequence_title,df_title,current_columns,current_action,swsm.get_current_subset_step_id()) #swsw.display_next_criteria(df_title,current_df,criteria,output_df_title) elif(optionId == swsm.PROCESS_SAVE_SAVED_SUBSET ) : save_subset_run(parms,1) elif(optionId == swsm.DISPLAY_GET_REMOTE_SUBSET) : chapterid = parms[0] new_config_df = None if(chapterid == cfg.DataInspection_ID) : new_config_df = cfg.get_config_value(cfg.CURRENT_INSPECTION_DF) elif(chapterid == cfg.DataCleansing_ID) : new_config_df = cfg.get_config_value(cfg.CURRENT_CLEANSE_DF) elif(chapterid == cfg.DataTransform_ID) : new_config_df = cfg.get_config_value(cfg.CURRENT_TRANSFORM_DF) elif(chapterid == cfg.DataExport_ID) : new_config_df = cfg.get_config_value(cfg.CURRENT_EXPORT_DF) elif(chapterid == cfg.DataImport_ID) : new_config_df = cfg.get_config_value(cfg.CURRENT_IMPORT_DF) elif(chapterid == cfg.SWGeocodeUtility_ID) : new_config_df = cfg.get_config_value(cfg.CURRENT_GEOCODE_DF) elif(chapterid == cfg.SWDFSubsetUtility_ID) : new_config_df = cfg.get_config_value(cfg.CURRENT_SUBSET_DF) cfg.set_config_value(cfg.CURRENT_SUBSET_DF,new_config_df) swsm.clear_current_subset_data() swsw.display_df_subset_setup() else : swsw.get_dfsubset_main_taskbar() cfg.drop_config_value(cfg.CURRENT_SUBSET_DF) clear_sw_utility_dfsubsetdata() cfg.display_data_select_df(cfg.SWDFSubsetUtility_ID) if(not(optionId == swsm.DISPLAY_MAIN)) : cfg.display_no_dfs(cfg.SWDFSubsetUtility_ID)
def get_df_list_html(title): """ * -------------------------------------------------------------------------- * function : get the html for list of dfc dataframes * * parms : * N/A * * returns : N/A * -------------------------------------------------------- """ fparms = [] if (title is None): if (cfg.is_a_dfc_dataframe_loaded()): df_titles = cfg.get_dfc_dataframes_titles_list() fparms = [ df_titles[0], str(len(cfg.get_dfc_dataframe_df(df_titles[0]))), str(len(cfg.get_dfc_dataframe_df(df_titles[0]).columns)), cfg.get_dfc_dataframe_notes(df_titles[0]) ] else: fparms = ["", "", "", ""] else: if (cfg.is_a_dfc_dataframe_loaded()): dfc_df = cfg.get_dfc_dataframe(title) if (dfc_df is None): fparms = ["", "", "", ""] else: fparms = [ title, str(len(cfg.get_dfc_dataframe_df(title))), str(len(cfg.get_dfc_dataframe_df(title).columns)), cfg.get_dfc_dataframe_notes(title) ] else: fparms = ["", "", "", ""] parmsProtect = [False, True, True, False] cfg.set_config_value(dfmgr_input_id + "Parms", fparms) cfg.set_config_value(dfmgr_input_id + "ParmsProtect", parmsProtect) dfmanager_input_form = InputForm(dfmgr_input_id, dfmgr_input_idList, dfmgr_input_labelList, dfmgr_input_typeList, dfmgr_input_placeholderList, dfmgr_input_jsList, dfmgr_input_reqList) selectDicts = [] df_titles = cfg.get_dfc_dataframes_titles_list() #if(df_titles is None) : if (not (cfg.is_a_dfc_dataframe_loaded())): dfs = {"default": " ", "list": [" "]} else: dfs = { "default": str(fparms[0]), "list": df_titles, "callback": "select_new_df" } selectDicts.append(dfs) get_select_defaults(dfmanager_input_form, dfmgr_input_id, dfmgr_input_idList, dfmgr_input_typeList, selectDicts) dfmanager_input_form.set_shortForm(True) dfmanager_input_form.set_gridwidth(480) dfmanager_input_form.set_custombwidth(90) dfmanager_input_form.set_fullparms(True) dfmgr_input_html = dfmanager_input_form.get_html() return (dfmgr_input_html)