def display_export_forms(exportid, detid=0, notes=False):

    if (not (cfg.is_a_dfc_dataframe_loaded())):
        cfg.drop_config_value(cfg.CURRENT_EXPORT_DF)

    if (exportid == dem.EXPORT_TB_ONLY):
        clear_data_export_data()

    dew.display_dc_export_forms(exportid, detid, notes)
Example #2
0
def display_data_inspection(option, parms=None):
    """
    * -------------------------------------------------------------------------- 
    * function : main data inspection processing
    * 
    * parms :
    *   option  -   function option
    *   parms   -   associated parms
    *
    * returns : 
    *  N/A
    * --------------------------------------------------------
    """

    from IPython.display import clear_output
    clear_output()

    opstat = opStatus()

    from dfcleanser.common.html_widgets import define_inputs, are_owner_inputs_defined
    if (not (are_owner_inputs_defined(cfg.DataInspection_ID))):
        define_inputs(cfg.DataInspection_ID, diw.datainspection_inputs)

    if (option == dim.MAIN_OPTION):
        drop_working_df()
        diw.display_dfc_inspection_main()
        clear_data_inspection_data()
    else:
        diw.display_inspection_main_taskbar()

    if (cfg.is_a_dfc_dataframe_loaded()):

        if ((option == dim.DISPLAY_DATATYPES_OPTION)
                or (option == dim.DISPLAY_NANS_OPTION)
                or (option == dim.DISPLAY_ROWS_OPTION)
                or (option == dim.DISPLAY_COLS_OPTION)
                or (option == dim.DISPLAY_CATEGORIES_OPTION)):

            fparms = get_parms_for_input(parms[0],
                                         diw.data_inspection_df_input_idList)

            if (len(fparms) > 0):
                cfg.set_config_value(cfg.CURRENT_INSPECTION_DF, fparms[0])

            if (not (option == dim.DISPLAY_ROWS_OPTION)):
                drop_working_df()

        if ((option == dim.DISPLAY_DATATYPES_OPTION)
                or (option == dim.DISPLAY_FULL_COLUMN_NAMES)):
            df_data_info = dim.get_df_datatypes_data(
                cfg.get_current_chapter_df(cfg.DataInspection_ID))
            display_inspect_datatypes(option, df_data_info)

        elif (option == dim.DISPLAY_NANS_OPTION):
            display_inspect_nans()

        elif (option == dim.DISPLAY_ROWS_OPTION):
            display_inspect_rows()

        elif (option == dim.DISPLAY_COLS_OPTION):
            if (len(parms) > 1):
                display_inspect_cols(parms[1])
            else:
                display_inspect_cols(None)

        elif (option == dim.DISPLAY_CATEGORIES_OPTION):
            display_inspect_categories()

        elif ((option == dim.DROP_ROW_NANS_OPTION)
              or (option == dim.DROP_COL_NANS_OPTION)):

            thresholdType = parms[0]

            if (option == dim.DROP_ROW_NANS_OPTION):
                fparms = get_parms_for_input(parms[1],
                                             diw.drop_rows_input_idList)
            else:
                fparms = get_parms_for_input(parms[1],
                                             diw.drop_columns_input_idList)

            if (len(fparms) > 0):
                try:
                    threshold = int(fparms[0])
                except:
                    opstat.set_status(False)
                    if (option == dim.DROP_ROW_NANS_OPTION):
                        opstat.set_errorMsg("Drop Nan Rows Threshold value '" +
                                            fparms[0] + "' is invalid")
                    else:
                        opstat.set_errorMsg("Drop Nan Cols Threshold value '" +
                                            fparms[0] + "' is invalid")

                    threshold = None

            else:
                opstat.set_status(False)
                if (option == dim.DROP_ROW_NANS_OPTION):
                    opstat.set_errorMsg(
                        "Drop Nan Rows Threshold value is not defined")
                else:
                    opstat.set_errorMsg(
                        "Drop Nan Cols Threshold value is not defined")

                threshold = None

            if (option == dim.DROP_ROW_NANS_OPTION):

                if (opstat.get_status()):
                    dropstats = drop_nan_rows(
                        cfg.get_current_chapter_df(cfg.DataInspection_ID),
                        threshold, thresholdType, opstat)

                if (not (opstat.get_status())):
                    display_exception(opstat)
                else:
                    if (dropstats[0] > 0):
                        display_status(
                            str(dropstats[0]) +
                            " Nan Rows Dropped Successfully")
                    else:
                        display_status(
                            "No Rows matching threshold were dropped")

            else:

                if (opstat.get_status()):
                    numcolsdropped = drop_nan_cols(
                        cfg.get_current_chapter_df(cfg.DataInspection_ID),
                        threshold, thresholdType, opstat)

                if (not (opstat.get_status())):
                    display_exception(opstat)
                else:
                    if (numcolsdropped > 0):
                        display_status(
                            str(numcolsdropped) +
                            " Columns with Nans Dropped Successfully")
                    else:
                        display_status(
                            " No Columns matching threshold were dropped")

        elif (option == dim.DISPLAY_ROW_OPTION):
            display_inspect_rows()

        elif (option == dim.DISPLAY_COL_GRAPHS):
            display_inspect_graphs(parms)

        elif (option == dim.DISPLAY_COL_OUTLIERS):
            display_inspect_outliers(parms[0])

        elif (option == dim.DISPLAY_SCROLL_TO_DF_ROW):
            diw.display_scroll_to_row()

        elif (option == dim.PROCESS_SCROLL_TO_DF_ROW):

            opstat = opStatus()

            df = cfg.get_current_chapter_df(cfg.DataInspection_ID)

            retparms = get_row_id_for_df(df, parms,
                                         diw.scroll_df_rows_input_idList,
                                         opstat)

            if (opstat.get_status()):

                if (retparms[1] == 0):
                    display_inspect_rows(retparms[0])
                else:
                    display_inspect_rows(retparms[0])

            else:

                diw.display_scroll_to_row()
                display_exception(opstat)

        elif (option == dim.SCROLL_DF_ROWS_DOWN):

            new_row_id = cfg.get_config_value(cfg.CURRENT_SCROLL_ROW_KEY)

            if (new_row_id is None):
                new_row_id = 0
            else:
                new_row_id = new_row_id + 200

                df = cfg.get_current_chapter_df(cfg.DataInspection_ID)
                if (new_row_id > len(df)):
                    new_row_id = cfg.get_config_value(
                        cfg.CURRENT_SCROLL_ROW_KEY)

            display_inspect_rows(new_row_id)

        elif (option == dim.SCROLL_DF_ROWS_UP):

            new_row_id = cfg.get_config_value(cfg.CURRENT_SCROLL_ROW_KEY)

            if (new_row_id is None):
                new_row_id = 0
            else:
                new_row_id = new_row_id - 200
                if (new_row_id < 0):
                    new_row_id = 0

            display_inspect_rows(new_row_id)

        elif (option == dim.DISPLAY_DF_ROW):

            print("dim.DISPLAY_DF_ROW")

        elif (option == dim.DISPLAY_DF_ROW_REMOTE):

            chapterid = parms[0]
            #print("chapterId",chapterid)

            new_config_df = None

            if (chapterid == cfg.DataInspection_ID):
                new_config_df = cfg.get_config_value(cfg.CURRENT_INSPECTION_DF)
            elif (chapterid == cfg.DataCleansing_ID):
                new_config_df = cfg.get_config_value(cfg.CURRENT_CLEANSE_DF)
            elif (chapterid == cfg.DataTransform_ID):
                new_config_df = cfg.get_config_value(cfg.CURRENT_TRANSFORM_DF)
            elif (chapterid == cfg.DataExport_ID):
                new_config_df = cfg.get_config_value(cfg.CURRENT_EXPORT_DF)
            elif (chapterid == cfg.DataImport_ID):
                new_config_df = cfg.get_config_value(cfg.CURRENT_IMPORT_DF)
            elif (chapterid == cfg.SWGeocodeUtility_ID):
                new_config_df = cfg.get_config_value(cfg.CURRENT_GEOCODE_DF)
            elif (chapterid == cfg.SWDFSubsetUtility_ID):
                new_config_df = cfg.get_config_value(cfg.CURRENT_SUBSET_DF)

            cfg.set_config_value(cfg.CURRENT_INSPECTION_DF, new_config_df)

            display_inspect_rows()

    else:

        cfg.drop_config_value(cfg.CURRENT_INSPECTION_DF)

        if (not (option == dim.MAIN_OPTION)):
            cfg.display_no_dfs(cfg.DataInspection_ID)

    from dfcleanser.common.display_utils import display_pop_up_buffer
    display_pop_up_buffer()
def process_export_form(formid, parms, display=True):
    """
    * -------------------------------------------------------------------------- 
    * function : process export function
    * 
    * parms :
    *   formid   -   form id
    *   fname    -   export parms
    *   display  -   display flag
    *
    * returns : N/A
    * --------------------------------------------------------
    """

    from dfcleanser.common.html_widgets import define_inputs, are_owner_inputs_defined
    if (not (are_owner_inputs_defined(cfg.DataExport_ID))):
        define_inputs(cfg.DataExport_ID, dew.dataexport_inputs)

    if (not (cfg.is_a_dfc_dataframe_loaded())):
        print("No Dataframe Currently Loadad")
        return ()

    if (formid == dem.EXPORT_DF_FROM_CENSUS):

        opstat = opStatus()
        dfid = parms + "_df"
        cfg.set_config_value(cfg.CURRENT_EXPORT_DF, dfid)

        display_export_forms(0)

    elif (formid == dem.EXPORT_TO_DB_FROM_CENSUS):

        opstat = opStatus()
        dfid = parms + "_df"
        cfg.set_config_value(cfg.CURRENT_EXPORT_DF, dfid)

        dew.display_dc_export_forms(2, 4)

    elif ((formid == dem.CSV_EXPORT) or (formid == dem.EXCEL_EXPORT)
          or (formid == dem.JSON_EXPORT) or (formid == dem.HTML_EXPORT)
          or (formid == dem.CUSTOM_EXPORT)):

        opstat = opStatus()

        if (display):
            clear_output()
            dew.display_export_main_taskbar()

            save_data_export_start()
            clock = RunningClock()
            clock.start()

        if (formid == dem.CSV_EXPORT):
            fparms = dew.get_csv_export_inputs(parms)
            opstat = export_pandas_csv(fparms, dew.pandas_export_csv_id,
                                       dew.pandas_export_csv_labelList)

            parmstitle = "Pandas CSV Export Parms"
            parmslist = dew.pandas_export_csv_labelList[:6]

        elif (formid == dem.EXCEL_EXPORT):
            fparms = dew.get_excel_export_inputs(parms)
            opstat = export_pandas_excel(fparms, dew.pandas_export_excel_id,
                                         dew.pandas_export_excel_labelList)

            parmstitle = "Pandas Excel Export Parms"
            parmslist = dew.pandas_export_excel_labelList[:7]

        elif (formid == dem.JSON_EXPORT):
            fparms = dew.get_json_export_inputs(parms)
            opstat = export_pandas_json(fparms, dew.pandas_export_json_id,
                                        dew.pandas_export_json_labelList)

            parmstitle = "Pandas JSON Export Parms"
            parmslist = dew.pandas_export_json_labelList[:6]

        elif (formid == dem.HTML_EXPORT):
            fparms = dew.get_html_export_inputs(parms)
            opstat = export_pandas_html(fparms, dew.pandas_export_html_id,
                                        dew.pandas_export_html_labelList)

            parmstitle = "Pandas HTML Export Parms"
            parmslist = dew.pandas_export_html_labelList[:8]

        elif (formid == dem.CUSTOM_EXPORT):
            (dispstats, opstat) = export_custom(parms)

            if (dispstats):
                parmstitle = "Custom Export Parms"
                parmslist = dew.custom_export_labelList[:4]

        if (opstat.get_status()):
            if (display):
                if (formid == dem.CUSTOM_EXPORT):
                    if (dispstats):
                        ciparms = parms[0].replace("\n", "</br>")
                        display_data_export_parms(parmstitle, parmslist,
                                                  [ciparms], cfg.DataExport_ID,
                                                  fparms[1], True)

                else:
                    display_data_export_parms(parmstitle, parmslist, fparms,
                                              cfg.DataExport_ID, fparms[1])

        else:
            display_exception(opstat)

        if (display):
            clock.stop()

    elif (formid == dem.SQLTABLE_EXPORT):
        export_sql_table(parms)
    else:
        print("Invalid formid " + str(formid))
        return
def display_dfsubset_utility(optionId,parms=None) :
    """
    * ---------------------------------------------------------
    * function : main subset utility control
    * 
    * parms :
    *  optionId     - function to run
    *  parms        - parms to ryn function
    *
    * returns : 
    *  NA
    * --------------------------------------------------------
    """
    
    if(cfg.is_a_dfc_dataframe_loaded()) :
        
        from IPython.display import clear_output
        clear_output()
        
        from dfcleanser.common.html_widgets import define_inputs, are_owner_inputs_defined
        if(not (are_owner_inputs_defined(cfg.SWDFSubsetUtility_ID)) ) :
            define_inputs(cfg.SWDFSubsetUtility_ID,swsw.SWUtility_subset_inputs)
    
        if(optionId == swsm.DISPLAY_MAIN) :
            
            swsw.get_dfsubset_main_taskbar()
            clear_sw_utility_dfsubsetdata()
            
            cfg.display_data_select_df(cfg.SWDFSubsetUtility_ID)
            swsm.clear_current_subset_data() 
            
        elif(optionId == swsm.DISPLAY_GET_SUBSET) :
            
            swsm.clear_current_subset_data()
            
            if(DEBUG_SUBSET) :
                swsm.set_current_subset_sequence(swsm.dfc_subset_sequence())
                print("\ncurrent_subset_sequence\n")
                print("input_df_title",swsm.get_current_subset_sequence().get_input_df_title()) 
                print("get_sequence_title",swsm.get_current_subset_sequence().get_sequence_title()) 
                print("get_sequence_steps",swsm.get_current_subset_sequence().get_sequence_steps()) 
                if(not (swsm.get_current_subset_sequence().get_sequence_steps() is None)) :
                    print("get_total_sequence_steps",swsm.get_current_subset_sequence().get_total_sequence_steps())
                    print("get_output_csv",swsm.get_current_subset_sequence().get_output_csv()) 
                    print("get_output_dfc_df_title",swsm.get_current_subset_sequence().get_output_dfc_df_title()) 
            
            
            swsw.display_df_subset_setup()
            
            
            if(DEBUG_SUBSET) :
                print("DISPLAY_GET_SUBSET",parms)
                print("DISPLAY_GET_SUBSET : clear data")
                print(swsm.get_current_subset_sequence())
                print(swsm.get_current_subset_df())
                print(swsm.get_current_subset_step())
                print("new_sequence")
                swsm.dump_current_step()
                swsm.dump_current_sequence()
            
        
        elif(optionId == swsm.PROCESS_GET_SUBSET) :
            
            current_step        =   swsm.get_current_subset_step()
            
            if(not (current_step is None)) :
                current_sequence    =   swsm.get_current_subset_sequence()
                current_sequence.add_step_to_sequence_steps(current_step) 
            
            fparms              =   get_parms_for_input(parms,swsw.get_subset_input_idList)
            
            if(len(fparms) > 0) :
        
                df_title            =   fparms[0]
                df                  =   cfg.get_dfc_dataframe_df(df_title)
                col_names           =   fparms[1]
                col_action          =   fparms[3]
        
            new_subset_df           =   drop_add_cols(col_names,col_action,df)
            new_subset_df_title     =   df_title
    
            new_subset_step     =   swsm.dfc_subset_step(new_subset_df_title,col_names,col_action)
            swsm.set_current_subset_step(new_subset_step)
            swsm.set_current_subset_df(new_subset_df)
            
            swsw.display_df_criteria(new_subset_df_title,new_subset_df) 
            
            if(DEBUG_SUBSET) :
                print("\nPROCESS_GET_SUBSET\n  ",parms,"\n  ",fparms)
                swsm.dump_current_step()
                swsm.dump_current_sequence()

            
        elif(optionId == swsm.DISPLAY_SAVED_SUBSET) :
            
            swsw.display_saved_subset_sequences() 

            if(DEBUG_SUBSET) :
                print("\nDISPLAY_SAVED_SUBSET",parms)
            
            
        elif(optionId == swsm.PROCESS_RUN_CRITERIA) :
            
            opstat  =   opStatus()
            
            
            fparms  =   get_parms_for_input(parms,swsw.get_subset_criteria_input_idList)
            
            subset_title    =   fparms[0]
            
            if(len(subset_title) == 0) :
                
                current_sequence    =   swsm.get_current_subset_sequence()
                total_steps         =   current_sequence.get_total_sequence_steps()
                current_step        =   swsm.get_current_subset_step()
                subset_title        =   current_step.get_input_subset_df_title() + "_subset_" + str(total_steps+1)
                
            criteria        =   fparms[2]
            
            if(len(criteria) > 0) :
                
                try :
                    
                    clock   =   RunningClock()
                    clock.start()
                    
                    final_criteria  =   (swsm.starting_criteria_preamble + criteria + swsm.starting_criteria_postamble)
        
                    exec(final_criteria)
                    
                    current_step    =   swsm.get_current_subset_step()
                    current_step.set_criteria(criteria)
                    current_step.set_output_subset_df_title(subset_title)
                    
                    clock.stop()
                    
                except Exception as e:
                    opstat.store_exception("Error running df_criteria " + criteria,e)
                    
                    clock.stop()
            
            
            if(opstat.get_status()) :
                swsw.display_process_subset() 
            else :
                display_exception(opstat)

            if(DEBUG_SUBSET) :
                print("PROCESS_RUN_CRITERIA : End")
                swsm.dump_current_step()
                swsm.dump_current_sequence()
            
        elif(optionId ==  swsm.DISPLAY_SAVE_SUBSET) :
            
            fparms              =   get_parms_for_input(parms,swsw.get_subset_run_input_idList)
            
            current_sequence    =   swsm.get_current_subset_sequence()
            current_step        =   swsm.get_current_subset_step()
            current_sequence.add_step_to_sequence_steps(current_step)
    
            if(len(fparms) > 0) :
        
                df_title            =   fparms[0]
                df                  =   swsm.get_current_subset_df()
                col_names           =   fparms[1]
                col_action          =   fparms[3]
 
            new_subset_df   =   drop_add_cols(col_names,col_action,df)
            
            swsw.display_save_subset(df_title,new_subset_df) 
            
            if(DEBUG_SUBSET) :
                print("DISPLAY_SAVE_SUBSET",parms,fparms)
                swsm.dump_current_step()
                swsm.dump_current_sequence()
            
 
            
        elif(optionId ==  swsm.DISPLAY_SAVE_AND_GET_SUBSET) :
            
            fparms              =   get_parms_for_input(parms,swsw.get_subset_run_input_idList)
            
            current_sequence    =   swsm.get_current_subset_sequence()
            current_step        =   swsm.get_current_subset_step()
            current_sequence.add_step_to_sequence_steps(current_step)
    
            if(len(fparms) > 0) :
        
                df_title            =   fparms[0]
                col_names           =   fparms[1]
                col_action          =   fparms[3]
        
            new_subset_step     =   swsm.dfc_subset_step(df_title,col_names,col_action)
            df                  =   swsm.get_current_subset_df()

            new_subset_df       =   drop_add_cols(col_names,col_action,df)
            swsm.set_current_subset_df(new_subset_df)
            swsm.set_current_subset_step(new_subset_step)
            
            swsw.display_df_criteria(df_title,new_subset_df)

            if(DEBUG_SUBSET) :
                print("PROCESS_SAVE_AND_GET_SUBSET",parms,fparms)
                swsm.dump_current_step()
                swsm.dump_current_sequence()

            
        elif(optionId ==  swsm.PROCESS_SAVE_SUBSET) :
            
            save_subset_run(parms,0)
            
        elif(optionId ==  swsm.PROCESS_SUBSET_SEQUENCE) :
            
            opstat  =   opStatus()
                        
            fparms      =   get_parms_for_input(parms,swsw.get_subset_sequences_input_idList)
            
            sequence    =   fparms[0]
            run_option  =   fparms[1]
            
            saved_sequence  =   swsm.get_subset_sequence(sequence)
            first_step      =   saved_sequence.get_sequence_step(0)
            
            df_title        =   first_step.get_input_subset_df_title()
            df              =   cfg.get_dfc_dataframe_df(df_title)
            
            if(df is None) :
                
                swsw.get_dfsubset_main_taskbar()
                cfg.display_data_select_df(cfg.SWDFSubsetUtility_ID)
                
                opstat.set_status(False)
                opstat.set_errorMsg("subset sequence starting df '" + df_title + "' is not currently loaded in dfc")
                display_exception(opstat)
            
            else :
                
                if(run_option == "Auto Run") :
                    
                    total_steps     =   saved_sequence.get_total_sequence_steps()
                    swsm.set_current_subset_df(df)
                    
                    for i in range(total_steps) :
                        
                        current_step        =   saved_sequence.get_sequence_step(i)
                        current_columns     =   current_step.get_col_names_list()
                        columns_action      =   current_step.get_keep_drop_flag()
                        criteria            =   current_step.get_criteria()
                        output_df_title     =   current_step.get_output_subset_df_title()
                        
                        current_df          =   swsm.get_current_subset_df()   
                        
                        if(len(current_columns) > 0) :
                            
                            colnames        =   list(current_df.columns)
                            drop_columns    =   []
                            
                            for i in range(len(colnames)) :
                                
                                if(columns_action == "Keep") :
                                    if(not (colnames[i] in current_columns)) :
                                        drop_columns.append(colnames[i])
                                else :
                                    if(colnames[i] in current_columns) :
                                        drop_columns.append(colnames[i])  
                                        
                            if(len(drop_columns) > 0 ) :
                                
                                try :
                                    current_df.drop(drop_columns, axis=1, inplace=True)   
                                except :
                                    opstat.set_status(False)
                                    opstat.set_errorMsg("Unable to drop columns from subset dataframe")
                        
                        swsm.set_current_subset_df(current_df)
                        
                        try :
                            
                            current_df         =     swsm.get_current_subset_df()
                            exec(criteria + swsm.starting_criteria_postamble)
                            current_df         =     swsm.get_current_subset_df()
                            
                        except Exception as e:
                            opstat.store_exception("Error running subset sequence '" + sequence + "'",e)

                    swsw.display_save_subset(output_df_title,swsm.get_current_subset_df(),True)    
                    
                else :
                
                    total_steps     =   saved_sequence.get_total_sequence_steps()
                    swsm.set_current_subset_df(df)
                    
                    current_step        =   saved_sequence.get_sequence_step(0)
                    current_df_title    =   current_step.get_input_subset_df_title()
                    current_columns     =   current_step.get_col_names_list()
                    columns_action      =   current_step.get_keep_drop_flag()
                    
                    swsw.display_manual_df_subset_setup(saved_sequence.get_sequence_title(),current_df_title,current_columns,columns_action,0)
                    
                    swsm.set_current_subset_step_id(0)
                    
                    swsm.set_current_subset_sequence(saved_sequence)

                     
        elif(optionId ==  swsm.PROCESS_GET_NEXT_SUBSET) :
            
            fparms      =   get_parms_for_input(parms,swsw.get_manual_input_idList) 
            
            collist     =   fparms[1]
            collist     =   collist.lstrip("[")
            collist     =   collist.rstrip("]")
            collist     =   collist.split(",")
            
            keep_drop   =   fparms[3]
            
            saved_sequence      =   swsm.get_current_subset_sequence()
            
            total_steps     =   saved_sequence.get_total_sequence_steps()
            current_step_id =   swsm.get_current_subset_step_id()
            
            if(current_step_id < total_steps) :
                current_step        =   saved_sequence.get_sequence_step(swsm.get_current_subset_step_id())
            else :
                swsm.set_current_subset_step_col_names_list(collist)
                swsm.set_current_subset_keep_drop_flag(keep_drop)
                current_step        =   swsm.get_current_subset_step()
                
            swsm.dump_current_step() 
            
            swsm.set_current_subset_step(current_step)
            
            current_df_title    =   current_step.get_input_subset_df_title()

            current_df          =   swsm.get_current_subset_df()
            current_columns     =   current_step.get_col_names_list()
            columns_action      =   current_step.get_keep_drop_flag()
            criteria            =   current_step.get_criteria()
            output_df_title     =   current_step.get_output_subset_df_title()
            
            if(len(current_columns) > 0) :
                            
                colnames        =   list(current_df.columns)
                drop_columns    =   []
                            
                for i in range(len(colnames)) :
                                
                    if(columns_action == "Keep") :
                        if(not (colnames[i] in current_columns)) :
                            drop_columns.append(colnames[i])
                    else :
                        if(colnames[i] in current_columns) :
                            drop_columns.append(colnames[i])  
                                        
                if(len(drop_columns) > 0 ) :
                                
                    try :
                        current_df.drop(drop_columns, axis=1, inplace=True)   
                    except :
                        opstat.set_status(False)
                        opstat.set_errorMsg("Unable to drop columns from subset dataframe")
                        
            
            swsw.display_next_criteria(current_df_title,current_df,criteria,output_df_title)
            
            
        elif(optionId ==  swsm.PROCESS_NEXT_CRITERIA) :
            
            opstat  =   opStatus()
            
            fparms      =   get_parms_for_input(parms,swsw.get_next_criteria_input_idList)  
            
            output_df_title     =   fparms[0]
            criteria            =   fparms[2]
            
            current_sequence    =   swsm.get_subset_sequence(sequence)
            sequence_title      =   current_sequence.get_sequence_title()
            
            try :
                            
                current_df         =     swsm.get_current_subset_df()
                exec(criteria + swsm.starting_criteria_postamble)
                current_df         =     swsm.get_current_subset_df()
                            
            except Exception as e:
                
                opstat.store_exception("Error running subset sequence '" + sequence_title + "'",e)
                
                current_df_title    =   current_step.get_input_subset_df_title()
                current_df          =   swsm.get_current_subset_df()
                criteria            =   current_step.get_criteria()
                output_df_title     =   current_step.get_output_subset_df_title()
                
                swsw.display_next_criteria(current_df_title,current_df,criteria,output_df_title)                
                
                display_exception(opstat)
                
            if(opstat.get_status()) :
                
                
                swsm.set_current_subset_df(current_df)
                swsm.set_current_subset_step_id(swsm.get_current_subset_step_id() + 1)
                
                if(swsm.get_current_subset_step_id() >= swsm.get_current_subset_sequence().get_total_sequence_steps()) :
                    
                    swsw.display_sequence_save_subset(output_df_title,swsm.get_current_subset_df()) 

                else :
                    
                    current_step        =   swsm.get_current_subset_sequence().get_sequence_step(swsm.get_current_subset_step_id())
                    current_df_title    =   current_step.get_input_subset_df_title()
                    current_columns     =   current_step.get_col_names_list()
                    columns_action      =   current_step.get_keep_drop_flag()
                    
                    swsw.display_manual_df_subset_setup(swsm.get_current_subset_sequence().get_sequence_title(),current_df_title,current_columns,columns_action,swsm.get_current_subset_step_id())
                    
        elif(optionId ==  swsm.DISPLAY_NEW_STEP) :  
                
            current_sequence    =   swsm.get_current_subset_sequence()
            sequence_title      =   current_sequence.get_sequence_title()
            
            current_step        =   swsm.get_current_subset_step()
            df_title            =   current_step.get_output_subset_df_title()
            current_df          =   swsm.get_current_subset_df()
            current_columns     =   []
            current_action      =   "Keep"
            criteria            =   swsm.starting_criteria
            output_df_title     =   ""
            
            current_step        =   swsm.dfc_subset_step(df_title,current_columns,current_action,criteria,output_df_title)
            swsm.set_current_subset_step(current_step)
            
            swsw.display_manual_df_subset_setup(sequence_title,df_title,current_columns,current_action,swsm.get_current_subset_step_id())
            #swsw.display_next_criteria(df_title,current_df,criteria,output_df_title)
                
        elif(optionId ==  swsm.PROCESS_SAVE_SAVED_SUBSET ) :   

            save_subset_run(parms,1)
        
        elif(optionId ==  swsm.DISPLAY_GET_REMOTE_SUBSET) :  
            
            chapterid   =   parms[0]
            
            new_config_df   =   None
            
            if(chapterid == cfg.DataInspection_ID)      :   new_config_df  =   cfg.get_config_value(cfg.CURRENT_INSPECTION_DF)
            elif(chapterid == cfg.DataCleansing_ID)     :   new_config_df  =   cfg.get_config_value(cfg.CURRENT_CLEANSE_DF)
            elif(chapterid == cfg.DataTransform_ID)     :   new_config_df  =   cfg.get_config_value(cfg.CURRENT_TRANSFORM_DF)
            elif(chapterid == cfg.DataExport_ID)        :   new_config_df  =   cfg.get_config_value(cfg.CURRENT_EXPORT_DF)
            elif(chapterid == cfg.DataImport_ID)        :   new_config_df  =   cfg.get_config_value(cfg.CURRENT_IMPORT_DF)
            elif(chapterid == cfg.SWGeocodeUtility_ID)  :   new_config_df  =   cfg.get_config_value(cfg.CURRENT_GEOCODE_DF)
            elif(chapterid == cfg.SWDFSubsetUtility_ID) :   new_config_df  =   cfg.get_config_value(cfg.CURRENT_SUBSET_DF)
            
            cfg.set_config_value(cfg.CURRENT_SUBSET_DF,new_config_df)
            
            swsm.clear_current_subset_data()
            swsw.display_df_subset_setup()
            
    else :
        
        swsw.get_dfsubset_main_taskbar()
        
        cfg.drop_config_value(cfg.CURRENT_SUBSET_DF)
        clear_sw_utility_dfsubsetdata()
        
        cfg.display_data_select_df(cfg.SWDFSubsetUtility_ID)
            
        if(not(optionId == swsm.DISPLAY_MAIN)) :
            cfg.display_no_dfs(cfg.SWDFSubsetUtility_ID)
Example #5
0
def get_df_list_html(title):
    """
    * -------------------------------------------------------------------------- 
    * function : get the html for list of dfc dataframes
    * 
    * parms :
    *  N/A
    *
    * returns : N/A
    * --------------------------------------------------------
    """

    fparms = []

    if (title is None):

        if (cfg.is_a_dfc_dataframe_loaded()):
            df_titles = cfg.get_dfc_dataframes_titles_list()
            fparms = [
                df_titles[0],
                str(len(cfg.get_dfc_dataframe_df(df_titles[0]))),
                str(len(cfg.get_dfc_dataframe_df(df_titles[0]).columns)),
                cfg.get_dfc_dataframe_notes(df_titles[0])
            ]
        else:
            fparms = ["", "", "", ""]

    else:

        if (cfg.is_a_dfc_dataframe_loaded()):
            dfc_df = cfg.get_dfc_dataframe(title)

            if (dfc_df is None):
                fparms = ["", "", "", ""]
            else:
                fparms = [
                    title,
                    str(len(cfg.get_dfc_dataframe_df(title))),
                    str(len(cfg.get_dfc_dataframe_df(title).columns)),
                    cfg.get_dfc_dataframe_notes(title)
                ]
        else:
            fparms = ["", "", "", ""]

    parmsProtect = [False, True, True, False]

    cfg.set_config_value(dfmgr_input_id + "Parms", fparms)
    cfg.set_config_value(dfmgr_input_id + "ParmsProtect", parmsProtect)

    dfmanager_input_form = InputForm(dfmgr_input_id, dfmgr_input_idList,
                                     dfmgr_input_labelList,
                                     dfmgr_input_typeList,
                                     dfmgr_input_placeholderList,
                                     dfmgr_input_jsList, dfmgr_input_reqList)

    selectDicts = []
    df_titles = cfg.get_dfc_dataframes_titles_list()

    #if(df_titles is None) :
    if (not (cfg.is_a_dfc_dataframe_loaded())):
        dfs = {"default": " ", "list": [" "]}
    else:
        dfs = {
            "default": str(fparms[0]),
            "list": df_titles,
            "callback": "select_new_df"
        }
    selectDicts.append(dfs)

    get_select_defaults(dfmanager_input_form, dfmgr_input_id,
                        dfmgr_input_idList, dfmgr_input_typeList, selectDicts)

    dfmanager_input_form.set_shortForm(True)
    dfmanager_input_form.set_gridwidth(480)
    dfmanager_input_form.set_custombwidth(90)
    dfmanager_input_form.set_fullparms(True)

    dfmgr_input_html = dfmanager_input_form.get_html()

    return (dfmgr_input_html)
def display_dc_export_forms(exid, detid=0, notes=False):
    """
    * -------------------------------------------------------------------------- 
    * function : display pandas export input forms
    * 
    * parms :
    *   exid    -   export type
    *   detid   -   detail id
    *   notes   -   notes flag
    *
    * returns : N/A
    * --------------------------------------------------------
    """

    clear_output()

    # add the main import task bar
    if (exid == dem.EXPORT_TB_ONLY):

        display_export_main_taskbar()
        cfg.display_data_select_df(cfg.DataExport_ID)

    else:

        if (cfg.is_a_dfc_dataframe_loaded()):

            # add the pandas import task bar or pandas details form
            if ((exid == dem.EXPORT_PANDAS_TB_ONLY)
                    or (exid == dem.EXPORT_PANDAS_TB_PLUS_DETAILS)):

                # add the pandas export details form
                if (exid == dem.EXPORT_PANDAS_TB_PLUS_DETAILS):

                    if (detid == dem.SQLTABLE_EXPORT):

                        import dfcleanser.common.db_utils as dbutils

                        cfg.drop_config_value(pandas_export_sqltable_id +
                                              "Parms")

                        dbid = cfg.get_config_value(cfg.CURRENT_DB_ID_KEY)

                        if (dbid == None):
                            cfg.set_config_value(cfg.CURRENT_DB_ID_KEY,
                                                 dbutils.MySql)
                            conparms = cfg.get_config_value(
                                dbutils.MYSQL_DBCON_PARMS)
                            if (conparms == None):
                                conparms = [
                                    "", "", "", "", dbutils.pymysql_library
                                ]

                        elif (dbid == dbutils.MySql):
                            conparms = cfg.get_config_value(
                                dbutils.MYSQL_DBCON_PARMS)
                            if (conparms == None):
                                conparms = [
                                    "", "", "", "", dbutils.pymysql_library
                                ]

                        elif (dbid == dbutils.MS_SQL_Server):
                            conparms = cfg.get_config_value(
                                dbutils.MSSQL_DBCON_PARMS)
                            if (conparms == None):
                                conparms = [
                                    "", "", "", "", dbutils.pyodbc_library
                                ]

                        elif (dbid == dbutils.SQLite):
                            conparms = cfg.get_config_value(
                                dbutils.SQLITE_DBCON_PARMS)
                            if (conparms == None):
                                conparms = ["", dbutils.sqlite3_library]

                        elif (dbid == dbutils.Postgresql):
                            conparms = cfg.get_config_value(
                                dbutils.POSTGRESQL_DBCON_PARMS)
                            if (conparms == None):
                                conparms = [
                                    "", "", "", "", dbutils.psycopg2_library
                                ]

                        elif (dbid == dbutils.Oracle):
                            conparms = cfg.get_config_value(
                                dbutils.ORACLE_DBCON_PARMS)
                            if (conparms == None):
                                conparms = [
                                    "", "", "", dbutils.cx_oracle_library
                                ]

                        elif (dbid == dbutils.Custom):
                            conparms = cfg.get_config_value(
                                dbutils.CUSTOM_DBCON_PARMS)
                            if (conparms == None): conparms = [""]

                        dbutils.display_db_connector_inputs(
                            cfg.get_config_value(cfg.CURRENT_DB_ID_KEY),
                            conparms, dbutils.SQL_EXPORT)

                    else:

                        display_export_main_taskbar()

                        pandas_export_form = get_pandas_export_input_form(
                            detid)

                        if (cfg.get_dfc_mode() == cfg.INLINE_MODE):
                            pandas_export_form.set_shortForm(True)
                            pandas_export_form.set_gridwidth(640)
                            pandas_export_form.set_custombwidth(110)
                        else:
                            pandas_export_form.set_gridwidth(480)
                            pandas_export_form.set_custombwidth(100)

                        pandas_input_html = ""
                        pandas_input_html = pandas_export_form.get_html()

                        pandas_input_heading_html = "<div>" + get_pandas_export_input_title(
                            detid) + "</div>"

                        gridclasses = [
                            "dfcleanser-common-grid-header", "dfc-footer"
                        ]
                        gridhtmls = [
                            pandas_input_heading_html, pandas_input_html
                        ]

                        if (cfg.get_dfc_mode() == cfg.INLINE_MODE):
                            display_generic_grid("data-import-wrapper",
                                                 gridclasses, gridhtmls)
                        else:
                            display_generic_grid("data-import-pop-up-wrapper",
                                                 gridclasses, gridhtmls)

                else:
                    display_pandas_export_taskbar()

            elif (exid == dem.EXPORT_CUSTOM_ONLY):

                print("dem.EXPORT_PANDAS_TB_ONLY")

                # add the custom import form
                exportCustomDetailsForm = InputForm(
                    custom_export_id, custom_export_idList,
                    custom_export_labelList, custom_export_typeList,
                    custom_export_placeholderList, custom_export_jsList,
                    custom_export_reqList)

                if (notes):
                    customNotes = [
                        "To create custom export code in the code cell below hit 'New Custom Export'",
                        "&nbsp;&nbsp;&nbsp;&nbsp;(enter and test export in the code cell below)",
                        "&nbsp;&nbsp;&nbsp;&nbsp;(leave the '# custom export' comment line in the code cell",
                        "&nbsp;&nbsp;&nbsp;&nbsp;(call dfcleanser.common.cfg.get_dfc_dataframe_df() to get the current dataframe)",
                        "To run the export code in the Custom Export Code box hit 'Run Custom Export' button",
                        "&nbsp;&nbsp;&nbsp;&nbsp;(only the code in the Custom Export Code box is run and stored for scripting)",
                        "Once import successful hit 'Save Custom Import' button to store import code for future retrieval",
                        "To drop the custom export code and clear the Custom Export Code box hit 'Drop Custom Export' button"
                    ]

                    print("\n")
                    display_inline_help(customNotes, 92)

                selectDicts = []

                df_list = cfg.get_dfc_dataframes_select_list(cfg.DataExport_ID)
                selectDicts.append(df_list)

                flags = {"default": "False", "list": ["True", "False"]}
                selectDicts.append(flags)

                get_select_defaults(exportCustomDetailsForm, custom_export_id,
                                    custom_export_idList,
                                    custom_export_typeList, selectDicts)

                if (cfg.get_dfc_mode() == cfg.INLINE_MODE):
                    exportCustomDetailsForm.set_shortForm(True)
                    exportCustomDetailsForm.set_gridwidth(640)
                    exportCustomDetailsForm.set_custombwidth(110)
                else:
                    exportCustomDetailsForm.set_gridwidth(480)
                    exportCustomDetailsForm.set_custombwidth(100)

                exportCustomDetailsForm.set_fullparms(True)

                from dfcleanser.common.html_widgets import new_line
                custom_code = "# add USER CODE to export the df" + new_line + new_line
                custom_code = (
                    custom_code +
                    "from dfcleanser.common.cfg import get_dfc_dataframe" +
                    new_line)
                custom_code = (custom_code +
                               "df = get_dfc_dataframe_df(dataframe_title)" +
                               new_line + new_line)
                custom_code = (custom_code + "# USER CODE" + new_line)

                cfg.set_config_value(custom_export_id + "Parms",
                                     ["", custom_code, ""])

                custom_export_html = ""
                custom_export_html = exportCustomDetailsForm.get_html()

                custom_export_heading_html = "<div>Custom Export</div><br>"

                gridclasses = ["dfcleanser-common-grid-header", "dfc-footer"]
                gridhtmls = [custom_export_heading_html, custom_export_html]

                if (cfg.get_dfc_mode() == cfg.INLINE_MODE):
                    display_generic_grid("data-import-wrapper", gridclasses,
                                         gridhtmls)
                else:
                    display_generic_grid("data-import-pop-up-wrapper",
                                         gridclasses, gridhtmls)

        else:

            display_export_main_taskbar()
            cfg.display_data_select_df(cfg.DataExport_ID)
            cfg.display_no_dfs(cfg.DataExport_ID)