예제 #1
0
def upperCase_df_column(dftitle, dfcolname):
    """
    * ------------------------------------------------------------------------
    * function : convert string column to upper case
    * 
    * parms :
    *  dftitle     - dataframe title
    *  dfcolname   - dataframe column to normalize
    *
    * returns : 
    *    Successful : upper cased columns list  
    *    Error : opstat
    *
    * Notes : 
    *    dfcleanser generic function
    * -------------------------------------------------------------------------
    """

    opstat = opStatus()
    df = cfg.get_dfc_dataframe_df(dftitle)
    new_col_values = []

    try:

        new_col_values = map(lambda x: x.upper(), df[dfcolname])
        return (new_col_values)

    except Exception as e:
        opstat.store_exception(
            "'upperCase_df_column' error : " + dftitle + " " + dfcolname, e)
        return (opstat)
예제 #2
0
def normalize_df_column(dftitle, dfcolname):
    """
    * ------------------------------------------------------------------------
    * function : normalize a dataframe column
    * 
    * parms :
    *  dftitle     - dataframe title
    *  dfcolname   - dataframe column to normalize
    *
    * returns : 
    *    Successful : normalized column list  
    *    Error : opstat
    *
    * Notes : 
    *    dfcleanser generic function
    * -------------------------------------------------------------------------
    """

    opstat = opStatus()

    df = cfg.get_dfc_dataframe_df(dftitle)

    from sklearn.preprocessing import MinMaxScaler

    try:

        scaler = MinMaxScaler()
        scaled_values = scaler.fit_transform(df[dfcolname])
        return (scaled_values)

    except Exception as e:
        opstat.store_exception(
            "'normalize_df_column' error : " + dftitle + " " + dfcolname, e)
        return (opstat)
예제 #3
0
def absolute_df_column(dftitle, dfcolname):
    """
    * ------------------------------------------------------------------------
    * function : convert dataframe column to absolute value
    * 
    * parms :
    *  dftitle       - dataframe title
    *  dfcolname     - dataframe column to apply trig function to
    *
    * returns : 
    *    Successful : col list of abs values  
    *    Error : opstat
    *
    * Notes : 
    *    dfcleanser generic function
    * -------------------------------------------------------------------------
    """

    opstat = opStatus()
    df = cfg.get_dfc_dataframe_df(dftitle)

    import numpy as np
    colabsolutes = np.array()

    try:

        colabsolutes = np.absolute(df[dfcolname])
        return (colabsolutes)

    except Exception as e:
        opstat.store_exception(
            "'absolute_df_column' error : " + dftitle + " " + dfcolname, e)
        return (opstat)
예제 #4
0
def drop_working_df():

    from dfcleanser.common.cfg import get_dfc_dataframe_df, drop_dfc_dataframe

    df = get_dfc_dataframe_df("dfc_subset_working_df")

    if (not (df is None)):
        drop_dfc_dataframe("dfc_subset_working_df")
예제 #5
0
def get_df_geocode_center(dftitle, dfcolname):
    """
    * ------------------------------------------------------------------------
    * function : get the center point of a dataframe locations column
    * 
    * parms :
    *  dftitle       - dataframe name
    *  dfcolname     - dataframe column to use for locations
    *
    * returns : 
    *    center point if no exception
    *    opStatus object if exception
    *
    * Notes : 
    *    dfcleanser generic function
    * -------------------------------------------------------------------------
    """

    opstat = opStatus()

    import json

    geocoords = []

    df = cfg.get_dfc_dataframe_df(dftitle)

    if (len(dfcolname == 1)):
        geocoords = df[dfcolname[0]].tolist()

        if (type(geocoords[0]) == str):
            geocoords = json.dumps(geocoords)

    elif (len(dfcolname) == 2):

        geolats = df[dfcolname[0]].tolist()
        if (type(geolats[0]) == str):
            geolats = json.dumps(geolats)

        geolongs = df[dfcolname[1]].tolist()
        if (type(geolongs[0]) == str):
            geolongs = json.dumps(geolongs)

        for i in range(len(geolats)):
            geocoords.append([geolats[i], geolongs[i]])

    else:

        opstat.set_status(False)
        opstat.set_errorMsg(
            "get_df_geocode_center Error : column names list is invalid")

    if (opstat.get_status()):
        return (get_geocode_center(geocoords, opstat))
    else:
        return (opstat)
예제 #6
0
def get_trig_values_for_column(dftitle, dfcolname, trigfunc):
    """
    * ------------------------------------------------------------------------
    * function : get trig column values
    * 
    * parms :
    *  dftitle       - dataframe title
    *  dfcolname     - dataframe column to apply trig function to
    *  trigfunc      - trig function to apply 
    *                    ('sin','cos','tan','arcsin','arccos','arctan')
    *
    * returns : 
    *    Successful : col list of trig values  
    *    Error : opstat
    *
    * Notes : 
    *    dfcleanser generic function
    * -------------------------------------------------------------------------
    """

    opstat = opStatus()
    df = cfg.get_dfc_dataframe_df(dftitle)

    try:

        import numpy as np
        trigcol = np.array()

        if (trigfunc == 'sin'): trigcol = np.sin(df[dfcolname])
        elif (trigfunc == 'cos'): trigcol = np.cos(df[dfcolname])
        elif (trigfunc == 'tan'): trigcol = np.tan(df[dfcolname])
        elif (trigfunc == 'arcsin'): trigcol = np.arcsin(df[dfcolname])
        elif (trigfunc == 'arccos'): trigcol = np.arccos(df[dfcolname])
        elif (trigfunc == 'arctan'): trigcol = np.arctan(df[dfcolname])
        else:
            trigcol = None

        return (trigcol)

    except Exception as e:
        opstat.store_exception(
            "'get_trig_values_for_column' error : " + dftitle + " " +
            dfcolname + " " + trigfunc, e)
        return (opstat)
예제 #7
0
def random_float_range(dftitle, randomFloatLower, randomFloatUpper):
    """
    * ------------------------------------------------------------------------
    * function : generate column of random floats in a range
    * 
    * parms :
    *  dftitle            - dataframe title
    *  randomFloatLower   - random integer lower range value
    *  randomFloatUpper   - random integer upper range value
    *
    * returns : 
    *    Successful : cols list of random floats  
    *    Error : opstat
    *
    * Notes : 
    *    dfcleanser generic function
    * -------------------------------------------------------------------------
    """

    opstat = opStatus()
    df = cfg.get_dfc_dataframe_df(dftitle)

    import numpy as np
    import random

    colrandfloats = np.array()

    try:

        for i in range(len(df)):
            colrandfloats.append(
                random.randrange(float(randomFloatLower),
                                 float(randomFloatUpper)))

        return (colrandfloats)

    except Exception as e:
        opstat.store_exception(
            "'random_float_range' error : " + dftitle + " " +
            str(randomFloatLower) + " " + str(randomFloatUpper), e)
        return (opstat)
예제 #8
0
def round_df_column(dftitle, dfcolname, decimals):
    """
    * ------------------------------------------------------------------------
    * function : round float column to decials range
    * 
    * parms :
    *  dftitle       - dataframe title
    *  dfcolname     - dataframe column to round
    *  decimals      - rounding precision
    *                   0 - round to int
    *
    * returns : 
    *    Successful : roundex col vals list  
    *    Error : opstat
    *
    * Notes : 
    *    dfcleanser generic function
    * -------------------------------------------------------------------------
    """

    opstat = opStatus()
    df = cfg.get_dfc_dataframe_df(dftitle)

    import numpy as np
    dfrounds = np.array()

    try:

        if (decimals == 0):
            dfrounds = np.rint(df[dfcolname])
        else:
            dfrounds = np.round_(df[dfcolname, decimals])

        return (dfrounds)

    except Exception as e:
        opstat.store_exception(
            "'round_df_column' error : " + dftitle + " " + dfcolname + " " +
            str(decimals), e)
        return (opstat)
예제 #9
0
def convert_df_column_to_degrees_or_radians(dftitle, dfcolname, degrees):
    """
    * ------------------------------------------------------------------------
    * function : convert dataframe column to degrees or radians
    * 
    * parms :
    *  dftitle       - dataframe title
    *  dfcolname     - dataframe column to apply trig function to
    *  degrees       - True  - convert to degrees
    *                  False - conveet to radians
    *
    * returns : 
    *    Successful : converted column values list  
    *    Error : opstat
    *
    * Notes : 
    *    dfcleanser generic function
    * -------------------------------------------------------------------------
    """

    opstat = opStatus()
    df = cfg.get_dfc_dataframe_df(dftitle)

    import numpy as np
    colvalues = np.array()

    try:

        if (degrees):
            colvalues = np.degrees(df[dfcolname])
        else:
            colvalues = np.radians(df[dfcolname])

        return (colvalues)

    except Exception as e:
        opstat.store_exception(
            "'convert_df_column_to_degrees_or_radians' error : " + dftitle +
            " " + dfcolname + " " + str(degrees), e)
        return (opstat)
def display_df_subset_setup():
    """
    * -------------------------------------------------------------------------- 
    * function : display current df subset form
    * 
    * parms :
    *  df      -   dataframe to subset from
    *  filters -   filters form 
    *  colname -   filters column name 
    *
    * returns : N/A
    * --------------------------------------------------------
    """
    df_title = cfg.get_config_value(cfg.CURRENT_SUBSET_DF)
    df = cfg.get_dfc_dataframe_df(df_title)

    col_stats_table = get_column_stats_table(df_title, df)

    from dfcleanser.common.html_widgets import InputForm
    subset_input_form = InputForm(get_subset_input_id, get_subset_input_idList,
                                  get_subset_input_labelList,
                                  get_subset_input_typeList,
                                  get_subset_input_placeholderList,
                                  get_subset_input_jsList,
                                  get_subset_input_reqList)

    selectDicts = []

    dataframes = cfg.get_dfc_dataframes_select_list(cfg.SWDFSubsetUtility_ID)
    selectDicts.append(dataframes)

    current_df = cfg.get_current_chapter_df(cfg.SWDFSubsetUtility_ID)
    colnames = current_df.columns.tolist()
    cols_name_list = [" "]
    for i in range(len(colnames)):
        cols_name_list.append(colnames[i])

    cnames = {
        "default": cols_name_list[0],
        "list": cols_name_list,
        "callback": "change_subset_cols"
    }
    selectDicts.append(cnames)

    subssel = {"default": "Keep", "list": ["Keep", "Drop"]}
    selectDicts.append(subssel)

    get_select_defaults(subset_input_form, get_subset_input_form[0],
                        get_subset_input_form[1], get_subset_input_form[3],
                        selectDicts)

    subset_input_form.set_shortForm(False)
    subset_input_form.set_gridwidth(680)
    subset_input_form.set_custombwidth(140)
    subset_input_form.set_fullparms(True)

    get_subset_input_html = subset_input_form.get_html()

    get_subset_heading_html = "<div>Get Dataframe Subset</div><br></br>"

    gridclasses = ["dfc-top", "dfcleanser-common-grid-header", "dfc-bottom"]
    gridhtmls = [
        col_stats_table, get_subset_heading_html, get_subset_input_html
    ]

    print("\n")
    display_generic_grid("sw-utils-subset-wrapper", gridclasses, gridhtmls)
def export_pandas_sqltable(sqltableparms, dbcondict, exportid, display=True):
    """
    * -------------------------------------------------------------------------- 
    * function : export pandas dataframe into sql table 
    * 
    * parms :
    *   sqltableparms    -   export parms
    *   dbcondict        -   db connector dict
    *   exportId         -   export id
    *   display          -   display flag
    *
    * returns : N/A
    * --------------------------------------------------------
    """

    opstat = opStatus()

    import dfcleanser.common.db_utils as dbu
    dbcon = dbu.dbConnector()

    from dfcleanser.common.db_utils import grab_connection_parms
    if (dbcondict == None):
        parmslist = get_stored_con_Parms(
            cfg.get_config_value(cfg.CURRENT_DB_ID_KEY))
        dbcondict = set_dbcon_dict(cfg.get_config_value(cfg.CURRENT_DB_ID_KEY),
                                   parmslist)
        dbconparms = grab_connection_parms(dbcondict)
    else:
        dbconparms = grab_connection_parms(dbcondict)

    dbcon.set_ConnectionParms(dbconparms)

    dbconnector = dbcon.connect_to_db(dbu.SQLALCHEMY, opstat)

    if (opstat.get_status()):

        if (len(sqltableparms) == 0):
            opstat.set_status(False)
            opstat.set_errorMsg("No Export parameters defined")

        else:

            if (sqltableparms[0] == ""):
                opstat.set_status(False)
                opstat.set_errorMsg("No dataframe selcted to export")

            else:

                if (sqltableparms[1] == ""):
                    opstat.set_status(False)
                    opstat.set_errorMsg("No tabl;e selcted to export to")

                else:

                    df = cfg.get_dfc_dataframe_df(sqltableparms[0])

                    labellist = dew.pandas_export_sqltable_labelList

                    try:

                        sqlkeys = [
                            labellist[2], labellist[3], labellist[4],
                            labellist[5], labellist[6], labellist[7]
                        ]
                        sqlvals = [
                            sqltableparms[2], sqltableparms[3],
                            sqltableparms[4], sqltableparms[5],
                            sqltableparms[6], sqltableparms[7]
                        ]
                        sqltypes = [
                            STRING_PARM, STRING_PARM, BOOLEAN_PARM,
                            STRING_PARM, INT_PARM, DICT_PARM
                        ]

                        sqlparms = {}
                        sqladdlparms = {}

                    except Exception as e:
                        opstat.set_status(False)
                        opstat.store_exception("Error parsing Export parms", e)

                    if (opstat.get_status()):

                        try:

                            sqlparms = get_function_parms(
                                sqlkeys, sqlvals, sqltypes)
                            if (not (sqltableparms[8] == "")):
                                sqladdlparms = json.loads(sqltableparms[8])

                            if (len(sqladdlparms) > 0):
                                addlparmskeys = sqladdlparms.keys()
                                for i in range(len(addlparmskeys)):
                                    sqlparms.update({
                                        addlparmskeys[i]:
                                        sqladdlparms.get(addlparmskeys[i])
                                    })

                        except Exception as e:
                            opstat.set_status(False)
                            opstat.store_exception(
                                "Error parsing Export additional parms", e)

                        if (opstat.get_status()):

                            try:

                                df.to_sql(sqltableparms[1], dbconnector,
                                          **sqlparms)

                            except Exception as e:
                                opstat.store_exception(
                                    "Unable to export to sql table", e)

    export_notes = ""

    if (opstat.get_status()):

        if (display):
            #make scriptable
            add_to_script([
                "# Export SQL Table ",
                "from dfcleanser.data_export.data_export_control export export_pandas_sqltable",
                "export_pandas_sqltable(" + json.dumps(sqltableparms) + "," +
                json.dumps(dbcondict) + "," + str(exportid) + ",False)"
            ], opstat)

        export_notes = dbu.get_SQLAlchemy_connector_string(dbconparms)

        if (len(sqltableparms) > 0):
            cfg.set_config_value(exportid + "Parms", sqltableparms)
            cfg.set_config_value(cfg.CURRENT_EXPORTED_FILE_NAME_KEY,
                                 sqltableparms[0], True)

    return (export_notes, opstat)
def export_pandas_html(fparms, exportId, labellist, display=True):
    """
    * -------------------------------------------------------------------------- 
    * function : pandas html export 
    * 
    * parms :
    *   fparms        -   export parms
    *   exportId      -   export id
    *   labellist     -   parm label list
    *   display       -   display flag
    *
    * returns : N/A
    * --------------------------------------------------------
    """

    opstat = opStatus()

    if (len(fparms) == 0):
        opstat.set_status(False)
        opstat.set_errorMsg("No Export parameters defined")
    else:
        try:

            htmlkeys = [labellist[2], labellist[3], labellist[4], labellist[5]]
            htmlvals = [fparms[2], fparms[3], fparms[4], fparms[5]]
            htmltypes = [INT_PARM, BOOLEAN_PARM, BOOLEAN_PARM, STRING_PARM]

            htmlparms = {}
            htmladdlparms = {}

        except Exception as e:
            opstat.store_exception("Error parsing import parms", e)

    if (opstat.get_status()):

        try:

            htmlparms = get_function_parms(htmlkeys, htmlvals, htmltypes)
            if (not (fparms[6] == "")):
                htmladdlparms = json.loads(fparms[6])

            if (len(htmladdlparms) > 0):
                addlparmskeys = htmladdlparms.keys()
                for i in range(len(addlparmskeys)):
                    htmlparms.update({
                        addlparmskeys[i]:
                        htmladdlparms.get(addlparmskeys[i])
                    })

        except Exception as e:
            opstat.store_exception("Unable to get additional parms", e)

    if (opstat.get_status()):

        if (fparms[0] == ""):
            opstat.set_status(False)
            opstat.set_errorMsg("No dataframe slected")
        else:
            df = cfg.get_dfc_dataframe_df(fparms[0])

            try:
                if (len(htmlparms) > 0):
                    df.to_html(fparms[1], **htmlparms)
                else:
                    df.to_html(fparms[1])

            except Exception as e:
                opstat.store_exception(
                    "Unable to export html file" + fparms[0], e)

    if (opstat.get_status()):

        if (display):
            #make scriptable
            script = [
                "# Export HTML File ",
                "dfcleanser.data_export.data_export_control import export_pandas_html",
                "export_pandas_html(" + json.dumps(fparms) + "," +
                str(exportId) + "," + json.dumps(labellist) + ",False)"
            ]

            add_to_script(script, opstat)

        if (len(fparms) > 0):
            cfg.set_config_value(exportId + "Parms", fparms)
            cfg.set_config_value(cfg.CURRENT_EXPORTED_FILE_NAME_KEY, fparms[0],
                                 True)

    return (opstat)
def display_dfsubset_utility(optionId,parms=None) :
    """
    * ---------------------------------------------------------
    * function : main subset utility control
    * 
    * parms :
    *  optionId     - function to run
    *  parms        - parms to ryn function
    *
    * returns : 
    *  NA
    * --------------------------------------------------------
    """
    
    if(cfg.is_a_dfc_dataframe_loaded()) :
        
        from IPython.display import clear_output
        clear_output()
        
        from dfcleanser.common.html_widgets import define_inputs, are_owner_inputs_defined
        if(not (are_owner_inputs_defined(cfg.SWDFSubsetUtility_ID)) ) :
            define_inputs(cfg.SWDFSubsetUtility_ID,swsw.SWUtility_subset_inputs)
    
        if(optionId == swsm.DISPLAY_MAIN) :
            
            swsw.get_dfsubset_main_taskbar()
            clear_sw_utility_dfsubsetdata()
            
            cfg.display_data_select_df(cfg.SWDFSubsetUtility_ID)
            swsm.clear_current_subset_data() 
            
        elif(optionId == swsm.DISPLAY_GET_SUBSET) :
            
            swsm.clear_current_subset_data()
            
            if(DEBUG_SUBSET) :
                swsm.set_current_subset_sequence(swsm.dfc_subset_sequence())
                print("\ncurrent_subset_sequence\n")
                print("input_df_title",swsm.get_current_subset_sequence().get_input_df_title()) 
                print("get_sequence_title",swsm.get_current_subset_sequence().get_sequence_title()) 
                print("get_sequence_steps",swsm.get_current_subset_sequence().get_sequence_steps()) 
                if(not (swsm.get_current_subset_sequence().get_sequence_steps() is None)) :
                    print("get_total_sequence_steps",swsm.get_current_subset_sequence().get_total_sequence_steps())
                    print("get_output_csv",swsm.get_current_subset_sequence().get_output_csv()) 
                    print("get_output_dfc_df_title",swsm.get_current_subset_sequence().get_output_dfc_df_title()) 
            
            
            swsw.display_df_subset_setup()
            
            
            if(DEBUG_SUBSET) :
                print("DISPLAY_GET_SUBSET",parms)
                print("DISPLAY_GET_SUBSET : clear data")
                print(swsm.get_current_subset_sequence())
                print(swsm.get_current_subset_df())
                print(swsm.get_current_subset_step())
                print("new_sequence")
                swsm.dump_current_step()
                swsm.dump_current_sequence()
            
        
        elif(optionId == swsm.PROCESS_GET_SUBSET) :
            
            current_step        =   swsm.get_current_subset_step()
            
            if(not (current_step is None)) :
                current_sequence    =   swsm.get_current_subset_sequence()
                current_sequence.add_step_to_sequence_steps(current_step) 
            
            fparms              =   get_parms_for_input(parms,swsw.get_subset_input_idList)
            
            if(len(fparms) > 0) :
        
                df_title            =   fparms[0]
                df                  =   cfg.get_dfc_dataframe_df(df_title)
                col_names           =   fparms[1]
                col_action          =   fparms[3]
        
            new_subset_df           =   drop_add_cols(col_names,col_action,df)
            new_subset_df_title     =   df_title
    
            new_subset_step     =   swsm.dfc_subset_step(new_subset_df_title,col_names,col_action)
            swsm.set_current_subset_step(new_subset_step)
            swsm.set_current_subset_df(new_subset_df)
            
            swsw.display_df_criteria(new_subset_df_title,new_subset_df) 
            
            if(DEBUG_SUBSET) :
                print("\nPROCESS_GET_SUBSET\n  ",parms,"\n  ",fparms)
                swsm.dump_current_step()
                swsm.dump_current_sequence()

            
        elif(optionId == swsm.DISPLAY_SAVED_SUBSET) :
            
            swsw.display_saved_subset_sequences() 

            if(DEBUG_SUBSET) :
                print("\nDISPLAY_SAVED_SUBSET",parms)
            
            
        elif(optionId == swsm.PROCESS_RUN_CRITERIA) :
            
            opstat  =   opStatus()
            
            
            fparms  =   get_parms_for_input(parms,swsw.get_subset_criteria_input_idList)
            
            subset_title    =   fparms[0]
            
            if(len(subset_title) == 0) :
                
                current_sequence    =   swsm.get_current_subset_sequence()
                total_steps         =   current_sequence.get_total_sequence_steps()
                current_step        =   swsm.get_current_subset_step()
                subset_title        =   current_step.get_input_subset_df_title() + "_subset_" + str(total_steps+1)
                
            criteria        =   fparms[2]
            
            if(len(criteria) > 0) :
                
                try :
                    
                    clock   =   RunningClock()
                    clock.start()
                    
                    final_criteria  =   (swsm.starting_criteria_preamble + criteria + swsm.starting_criteria_postamble)
        
                    exec(final_criteria)
                    
                    current_step    =   swsm.get_current_subset_step()
                    current_step.set_criteria(criteria)
                    current_step.set_output_subset_df_title(subset_title)
                    
                    clock.stop()
                    
                except Exception as e:
                    opstat.store_exception("Error running df_criteria " + criteria,e)
                    
                    clock.stop()
            
            
            if(opstat.get_status()) :
                swsw.display_process_subset() 
            else :
                display_exception(opstat)

            if(DEBUG_SUBSET) :
                print("PROCESS_RUN_CRITERIA : End")
                swsm.dump_current_step()
                swsm.dump_current_sequence()
            
        elif(optionId ==  swsm.DISPLAY_SAVE_SUBSET) :
            
            fparms              =   get_parms_for_input(parms,swsw.get_subset_run_input_idList)
            
            current_sequence    =   swsm.get_current_subset_sequence()
            current_step        =   swsm.get_current_subset_step()
            current_sequence.add_step_to_sequence_steps(current_step)
    
            if(len(fparms) > 0) :
        
                df_title            =   fparms[0]
                df                  =   swsm.get_current_subset_df()
                col_names           =   fparms[1]
                col_action          =   fparms[3]
 
            new_subset_df   =   drop_add_cols(col_names,col_action,df)
            
            swsw.display_save_subset(df_title,new_subset_df) 
            
            if(DEBUG_SUBSET) :
                print("DISPLAY_SAVE_SUBSET",parms,fparms)
                swsm.dump_current_step()
                swsm.dump_current_sequence()
            
 
            
        elif(optionId ==  swsm.DISPLAY_SAVE_AND_GET_SUBSET) :
            
            fparms              =   get_parms_for_input(parms,swsw.get_subset_run_input_idList)
            
            current_sequence    =   swsm.get_current_subset_sequence()
            current_step        =   swsm.get_current_subset_step()
            current_sequence.add_step_to_sequence_steps(current_step)
    
            if(len(fparms) > 0) :
        
                df_title            =   fparms[0]
                col_names           =   fparms[1]
                col_action          =   fparms[3]
        
            new_subset_step     =   swsm.dfc_subset_step(df_title,col_names,col_action)
            df                  =   swsm.get_current_subset_df()

            new_subset_df       =   drop_add_cols(col_names,col_action,df)
            swsm.set_current_subset_df(new_subset_df)
            swsm.set_current_subset_step(new_subset_step)
            
            swsw.display_df_criteria(df_title,new_subset_df)

            if(DEBUG_SUBSET) :
                print("PROCESS_SAVE_AND_GET_SUBSET",parms,fparms)
                swsm.dump_current_step()
                swsm.dump_current_sequence()

            
        elif(optionId ==  swsm.PROCESS_SAVE_SUBSET) :
            
            save_subset_run(parms,0)
            
        elif(optionId ==  swsm.PROCESS_SUBSET_SEQUENCE) :
            
            opstat  =   opStatus()
                        
            fparms      =   get_parms_for_input(parms,swsw.get_subset_sequences_input_idList)
            
            sequence    =   fparms[0]
            run_option  =   fparms[1]
            
            saved_sequence  =   swsm.get_subset_sequence(sequence)
            first_step      =   saved_sequence.get_sequence_step(0)
            
            df_title        =   first_step.get_input_subset_df_title()
            df              =   cfg.get_dfc_dataframe_df(df_title)
            
            if(df is None) :
                
                swsw.get_dfsubset_main_taskbar()
                cfg.display_data_select_df(cfg.SWDFSubsetUtility_ID)
                
                opstat.set_status(False)
                opstat.set_errorMsg("subset sequence starting df '" + df_title + "' is not currently loaded in dfc")
                display_exception(opstat)
            
            else :
                
                if(run_option == "Auto Run") :
                    
                    total_steps     =   saved_sequence.get_total_sequence_steps()
                    swsm.set_current_subset_df(df)
                    
                    for i in range(total_steps) :
                        
                        current_step        =   saved_sequence.get_sequence_step(i)
                        current_columns     =   current_step.get_col_names_list()
                        columns_action      =   current_step.get_keep_drop_flag()
                        criteria            =   current_step.get_criteria()
                        output_df_title     =   current_step.get_output_subset_df_title()
                        
                        current_df          =   swsm.get_current_subset_df()   
                        
                        if(len(current_columns) > 0) :
                            
                            colnames        =   list(current_df.columns)
                            drop_columns    =   []
                            
                            for i in range(len(colnames)) :
                                
                                if(columns_action == "Keep") :
                                    if(not (colnames[i] in current_columns)) :
                                        drop_columns.append(colnames[i])
                                else :
                                    if(colnames[i] in current_columns) :
                                        drop_columns.append(colnames[i])  
                                        
                            if(len(drop_columns) > 0 ) :
                                
                                try :
                                    current_df.drop(drop_columns, axis=1, inplace=True)   
                                except :
                                    opstat.set_status(False)
                                    opstat.set_errorMsg("Unable to drop columns from subset dataframe")
                        
                        swsm.set_current_subset_df(current_df)
                        
                        try :
                            
                            current_df         =     swsm.get_current_subset_df()
                            exec(criteria + swsm.starting_criteria_postamble)
                            current_df         =     swsm.get_current_subset_df()
                            
                        except Exception as e:
                            opstat.store_exception("Error running subset sequence '" + sequence + "'",e)

                    swsw.display_save_subset(output_df_title,swsm.get_current_subset_df(),True)    
                    
                else :
                
                    total_steps     =   saved_sequence.get_total_sequence_steps()
                    swsm.set_current_subset_df(df)
                    
                    current_step        =   saved_sequence.get_sequence_step(0)
                    current_df_title    =   current_step.get_input_subset_df_title()
                    current_columns     =   current_step.get_col_names_list()
                    columns_action      =   current_step.get_keep_drop_flag()
                    
                    swsw.display_manual_df_subset_setup(saved_sequence.get_sequence_title(),current_df_title,current_columns,columns_action,0)
                    
                    swsm.set_current_subset_step_id(0)
                    
                    swsm.set_current_subset_sequence(saved_sequence)

                     
        elif(optionId ==  swsm.PROCESS_GET_NEXT_SUBSET) :
            
            fparms      =   get_parms_for_input(parms,swsw.get_manual_input_idList) 
            
            collist     =   fparms[1]
            collist     =   collist.lstrip("[")
            collist     =   collist.rstrip("]")
            collist     =   collist.split(",")
            
            keep_drop   =   fparms[3]
            
            saved_sequence      =   swsm.get_current_subset_sequence()
            
            total_steps     =   saved_sequence.get_total_sequence_steps()
            current_step_id =   swsm.get_current_subset_step_id()
            
            if(current_step_id < total_steps) :
                current_step        =   saved_sequence.get_sequence_step(swsm.get_current_subset_step_id())
            else :
                swsm.set_current_subset_step_col_names_list(collist)
                swsm.set_current_subset_keep_drop_flag(keep_drop)
                current_step        =   swsm.get_current_subset_step()
                
            swsm.dump_current_step() 
            
            swsm.set_current_subset_step(current_step)
            
            current_df_title    =   current_step.get_input_subset_df_title()

            current_df          =   swsm.get_current_subset_df()
            current_columns     =   current_step.get_col_names_list()
            columns_action      =   current_step.get_keep_drop_flag()
            criteria            =   current_step.get_criteria()
            output_df_title     =   current_step.get_output_subset_df_title()
            
            if(len(current_columns) > 0) :
                            
                colnames        =   list(current_df.columns)
                drop_columns    =   []
                            
                for i in range(len(colnames)) :
                                
                    if(columns_action == "Keep") :
                        if(not (colnames[i] in current_columns)) :
                            drop_columns.append(colnames[i])
                    else :
                        if(colnames[i] in current_columns) :
                            drop_columns.append(colnames[i])  
                                        
                if(len(drop_columns) > 0 ) :
                                
                    try :
                        current_df.drop(drop_columns, axis=1, inplace=True)   
                    except :
                        opstat.set_status(False)
                        opstat.set_errorMsg("Unable to drop columns from subset dataframe")
                        
            
            swsw.display_next_criteria(current_df_title,current_df,criteria,output_df_title)
            
            
        elif(optionId ==  swsm.PROCESS_NEXT_CRITERIA) :
            
            opstat  =   opStatus()
            
            fparms      =   get_parms_for_input(parms,swsw.get_next_criteria_input_idList)  
            
            output_df_title     =   fparms[0]
            criteria            =   fparms[2]
            
            current_sequence    =   swsm.get_subset_sequence(sequence)
            sequence_title      =   current_sequence.get_sequence_title()
            
            try :
                            
                current_df         =     swsm.get_current_subset_df()
                exec(criteria + swsm.starting_criteria_postamble)
                current_df         =     swsm.get_current_subset_df()
                            
            except Exception as e:
                
                opstat.store_exception("Error running subset sequence '" + sequence_title + "'",e)
                
                current_df_title    =   current_step.get_input_subset_df_title()
                current_df          =   swsm.get_current_subset_df()
                criteria            =   current_step.get_criteria()
                output_df_title     =   current_step.get_output_subset_df_title()
                
                swsw.display_next_criteria(current_df_title,current_df,criteria,output_df_title)                
                
                display_exception(opstat)
                
            if(opstat.get_status()) :
                
                
                swsm.set_current_subset_df(current_df)
                swsm.set_current_subset_step_id(swsm.get_current_subset_step_id() + 1)
                
                if(swsm.get_current_subset_step_id() >= swsm.get_current_subset_sequence().get_total_sequence_steps()) :
                    
                    swsw.display_sequence_save_subset(output_df_title,swsm.get_current_subset_df()) 

                else :
                    
                    current_step        =   swsm.get_current_subset_sequence().get_sequence_step(swsm.get_current_subset_step_id())
                    current_df_title    =   current_step.get_input_subset_df_title()
                    current_columns     =   current_step.get_col_names_list()
                    columns_action      =   current_step.get_keep_drop_flag()
                    
                    swsw.display_manual_df_subset_setup(swsm.get_current_subset_sequence().get_sequence_title(),current_df_title,current_columns,columns_action,swsm.get_current_subset_step_id())
                    
        elif(optionId ==  swsm.DISPLAY_NEW_STEP) :  
                
            current_sequence    =   swsm.get_current_subset_sequence()
            sequence_title      =   current_sequence.get_sequence_title()
            
            current_step        =   swsm.get_current_subset_step()
            df_title            =   current_step.get_output_subset_df_title()
            current_df          =   swsm.get_current_subset_df()
            current_columns     =   []
            current_action      =   "Keep"
            criteria            =   swsm.starting_criteria
            output_df_title     =   ""
            
            current_step        =   swsm.dfc_subset_step(df_title,current_columns,current_action,criteria,output_df_title)
            swsm.set_current_subset_step(current_step)
            
            swsw.display_manual_df_subset_setup(sequence_title,df_title,current_columns,current_action,swsm.get_current_subset_step_id())
            #swsw.display_next_criteria(df_title,current_df,criteria,output_df_title)
                
        elif(optionId ==  swsm.PROCESS_SAVE_SAVED_SUBSET ) :   

            save_subset_run(parms,1)
        
        elif(optionId ==  swsm.DISPLAY_GET_REMOTE_SUBSET) :  
            
            chapterid   =   parms[0]
            
            new_config_df   =   None
            
            if(chapterid == cfg.DataInspection_ID)      :   new_config_df  =   cfg.get_config_value(cfg.CURRENT_INSPECTION_DF)
            elif(chapterid == cfg.DataCleansing_ID)     :   new_config_df  =   cfg.get_config_value(cfg.CURRENT_CLEANSE_DF)
            elif(chapterid == cfg.DataTransform_ID)     :   new_config_df  =   cfg.get_config_value(cfg.CURRENT_TRANSFORM_DF)
            elif(chapterid == cfg.DataExport_ID)        :   new_config_df  =   cfg.get_config_value(cfg.CURRENT_EXPORT_DF)
            elif(chapterid == cfg.DataImport_ID)        :   new_config_df  =   cfg.get_config_value(cfg.CURRENT_IMPORT_DF)
            elif(chapterid == cfg.SWGeocodeUtility_ID)  :   new_config_df  =   cfg.get_config_value(cfg.CURRENT_GEOCODE_DF)
            elif(chapterid == cfg.SWDFSubsetUtility_ID) :   new_config_df  =   cfg.get_config_value(cfg.CURRENT_SUBSET_DF)
            
            cfg.set_config_value(cfg.CURRENT_SUBSET_DF,new_config_df)
            
            swsm.clear_current_subset_data()
            swsw.display_df_subset_setup()
            
    else :
        
        swsw.get_dfsubset_main_taskbar()
        
        cfg.drop_config_value(cfg.CURRENT_SUBSET_DF)
        clear_sw_utility_dfsubsetdata()
        
        cfg.display_data_select_df(cfg.SWDFSubsetUtility_ID)
            
        if(not(optionId == swsm.DISPLAY_MAIN)) :
            cfg.display_no_dfs(cfg.SWDFSubsetUtility_ID)
예제 #14
0
def get_df_list_html(title):
    """
    * -------------------------------------------------------------------------- 
    * function : get the html for list of dfc dataframes
    * 
    * parms :
    *  N/A
    *
    * returns : N/A
    * --------------------------------------------------------
    """

    fparms = []

    if (title is None):

        if (cfg.is_a_dfc_dataframe_loaded()):
            df_titles = cfg.get_dfc_dataframes_titles_list()
            fparms = [
                df_titles[0],
                str(len(cfg.get_dfc_dataframe_df(df_titles[0]))),
                str(len(cfg.get_dfc_dataframe_df(df_titles[0]).columns)),
                cfg.get_dfc_dataframe_notes(df_titles[0])
            ]
        else:
            fparms = ["", "", "", ""]

    else:

        if (cfg.is_a_dfc_dataframe_loaded()):
            dfc_df = cfg.get_dfc_dataframe(title)

            if (dfc_df is None):
                fparms = ["", "", "", ""]
            else:
                fparms = [
                    title,
                    str(len(cfg.get_dfc_dataframe_df(title))),
                    str(len(cfg.get_dfc_dataframe_df(title).columns)),
                    cfg.get_dfc_dataframe_notes(title)
                ]
        else:
            fparms = ["", "", "", ""]

    parmsProtect = [False, True, True, False]

    cfg.set_config_value(dfmgr_input_id + "Parms", fparms)
    cfg.set_config_value(dfmgr_input_id + "ParmsProtect", parmsProtect)

    dfmanager_input_form = InputForm(dfmgr_input_id, dfmgr_input_idList,
                                     dfmgr_input_labelList,
                                     dfmgr_input_typeList,
                                     dfmgr_input_placeholderList,
                                     dfmgr_input_jsList, dfmgr_input_reqList)

    selectDicts = []
    df_titles = cfg.get_dfc_dataframes_titles_list()

    #if(df_titles is None) :
    if (not (cfg.is_a_dfc_dataframe_loaded())):
        dfs = {"default": " ", "list": [" "]}
    else:
        dfs = {
            "default": str(fparms[0]),
            "list": df_titles,
            "callback": "select_new_df"
        }
    selectDicts.append(dfs)

    get_select_defaults(dfmanager_input_form, dfmgr_input_id,
                        dfmgr_input_idList, dfmgr_input_typeList, selectDicts)

    dfmanager_input_form.set_shortForm(True)
    dfmanager_input_form.set_gridwidth(480)
    dfmanager_input_form.set_custombwidth(90)
    dfmanager_input_form.set_fullparms(True)

    dfmgr_input_html = dfmanager_input_form.get_html()

    return (dfmgr_input_html)