def export_sql_table(parms, display=True):
    """
    * -------------------------------------------------------------------------- 
    * function : export a sql table into pandas dataframe
    * 
    * parms :
    *   parms       -   sql parms
    *   display     -   display flag
    *
    * returns : N/A
    * --------------------------------------------------------
    """

    opstat = opStatus()

    dew.display_export_main_taskbar()

    print("export_sql_table", parms)

    save_data_export_start()
    clock = RunningClock()
    clock.start()

    dbid = cfg.get_config_value(cfg.CURRENT_DB_ID_KEY)
    dbcondict = set_dbcon_dict(dbid, get_stored_con_Parms(dbid))

    sqltableparms = dew.get_sqltable_export_inputs(parms)

    (export_notes,
     opstat) = export_pandas_sqltable(sqltableparms, dbcondict,
                                      dew.pandas_export_sqltable_id)

    clock.stop()

    if (opstat.get_status()):

        for i in range(len(sqltableparms)):
            sqltableparms[i] = get_string_value(sqltableparms[i])

        sqltableparms = sqltableparms[0:8]
        sqltableparms.append(export_notes)
        sqltablelabels = dew.pandas_export_sqltable_labelList[0:8]
        sqltablelabels.append("DB Connector String")

        display_data_export_parms("Pandas SQL Table Export Parms",
                                  sqltablelabels, sqltableparms,
                                  cfg.DataExport_ID, sqltableparms[1], True)

    else:
        display_exception(opstat)
def export_test_sql_db_connector(driverid, sqlinputparms):
    """
    * -------------------------------------------------------------------------- 
    * function : export pandas dataframe into sql table 
    * 
    * parms :
    *   driverid        -   driver id
    *   sqlinputparms   -   connection string
    *
    * returns : N/A
    * --------------------------------------------------------
    """

    try:

        from dfcleanser.common.db_utils import test_db_connector, SQL_EXPORT, NATIVE, SQLALCHEMY
        opstat = test_db_connector(cfg.get_config_value(cfg.CURRENT_DB_ID_KEY),
                                   NATIVE, sqlinputparms, SQL_EXPORT)
        opstat = test_db_connector(cfg.get_config_value(cfg.CURRENT_DB_ID_KEY),
                                   SQLALCHEMY, sqlinputparms, SQL_EXPORT)

    except Exception as e:
        opstat.store_exception("DB Connection failed ", e)
        display_exception(opstat)
Ejemplo n.º 3
0
def save_column_names_row(parms,display=True):
    """
    * -------------------------------------------------------------------------- 
    * function : save column names row to a file
    * 
    * parms :
    *   parms     -   transform parms
    *   display   -   display flag
    *
    * returns : 
    *  N/A
    * --------------------------------------------------------
    """

    opstat = opStatus() 
    
    try :
    
        fparms      =   get_parms_for_input(parms,dftw.df_save_row_transform_input_idList)
        filename    =   fparms[0]
            
        if(len(filename) == 0) :
            filename = "./" + cfg.get_config_value(cfg.CURRENT_IMPORTED_DATA_SOURCE_KEY)
            #filename = filename.replace(".","_")
            filename = filename + "_column_names.json"

        # see if save col names row 
        if(len(filename) > 0) :
                        
            colids = cfg.get_current_chapter_df(cfg.CURRENT_TRANSFORM_DF).columns.tolist()

            # save the index row as file
            with open(filename, 'w') as colid_file :
                json.dump(colids,colid_file)
                
            colid_file.close()
                
            if(display) :
                    
                #make scriptable
                add_to_script(["# save column names row",
                               "from dfcleanser.data_transform.data_transform_dataframe_control save_column_names_row",
                               "save_column_names_row(" + json.dumps(parms) + ",False)"],opstat)
                
    except Exception as e:
        opstat.store_exception("Unable to save column names file to : " + filename,e)

    return([opstat, filename])
Ejemplo n.º 4
0
def isEULA_read():

    if (cfg.get_config_value(cfg.EULA_FLAG_KEY) == 'true'):
        return (True)
    else:
        return (False)
def display_df_subset_setup():
    """
    * -------------------------------------------------------------------------- 
    * function : display current df subset form
    * 
    * parms :
    *  df      -   dataframe to subset from
    *  filters -   filters form 
    *  colname -   filters column name 
    *
    * returns : N/A
    * --------------------------------------------------------
    """
    df_title = cfg.get_config_value(cfg.CURRENT_SUBSET_DF)
    df = cfg.get_dfc_dataframe_df(df_title)

    col_stats_table = get_column_stats_table(df_title, df)

    from dfcleanser.common.html_widgets import InputForm
    subset_input_form = InputForm(get_subset_input_id, get_subset_input_idList,
                                  get_subset_input_labelList,
                                  get_subset_input_typeList,
                                  get_subset_input_placeholderList,
                                  get_subset_input_jsList,
                                  get_subset_input_reqList)

    selectDicts = []

    dataframes = cfg.get_dfc_dataframes_select_list(cfg.SWDFSubsetUtility_ID)
    selectDicts.append(dataframes)

    current_df = cfg.get_current_chapter_df(cfg.SWDFSubsetUtility_ID)
    colnames = current_df.columns.tolist()
    cols_name_list = [" "]
    for i in range(len(colnames)):
        cols_name_list.append(colnames[i])

    cnames = {
        "default": cols_name_list[0],
        "list": cols_name_list,
        "callback": "change_subset_cols"
    }
    selectDicts.append(cnames)

    subssel = {"default": "Keep", "list": ["Keep", "Drop"]}
    selectDicts.append(subssel)

    get_select_defaults(subset_input_form, get_subset_input_form[0],
                        get_subset_input_form[1], get_subset_input_form[3],
                        selectDicts)

    subset_input_form.set_shortForm(False)
    subset_input_form.set_gridwidth(680)
    subset_input_form.set_custombwidth(140)
    subset_input_form.set_fullparms(True)

    get_subset_input_html = subset_input_form.get_html()

    get_subset_heading_html = "<div>Get Dataframe Subset</div><br></br>"

    gridclasses = ["dfc-top", "dfcleanser-common-grid-header", "dfc-bottom"]
    gridhtmls = [
        col_stats_table, get_subset_heading_html, get_subset_input_html
    ]

    print("\n")
    display_generic_grid("sw-utils-subset-wrapper", gridclasses, gridhtmls)
def export_pandas_sqltable(sqltableparms, dbcondict, exportid, display=True):
    """
    * -------------------------------------------------------------------------- 
    * function : export pandas dataframe into sql table 
    * 
    * parms :
    *   sqltableparms    -   export parms
    *   dbcondict        -   db connector dict
    *   exportId         -   export id
    *   display          -   display flag
    *
    * returns : N/A
    * --------------------------------------------------------
    """

    opstat = opStatus()

    import dfcleanser.common.db_utils as dbu
    dbcon = dbu.dbConnector()

    from dfcleanser.common.db_utils import grab_connection_parms
    if (dbcondict == None):
        parmslist = get_stored_con_Parms(
            cfg.get_config_value(cfg.CURRENT_DB_ID_KEY))
        dbcondict = set_dbcon_dict(cfg.get_config_value(cfg.CURRENT_DB_ID_KEY),
                                   parmslist)
        dbconparms = grab_connection_parms(dbcondict)
    else:
        dbconparms = grab_connection_parms(dbcondict)

    dbcon.set_ConnectionParms(dbconparms)

    dbconnector = dbcon.connect_to_db(dbu.SQLALCHEMY, opstat)

    if (opstat.get_status()):

        if (len(sqltableparms) == 0):
            opstat.set_status(False)
            opstat.set_errorMsg("No Export parameters defined")

        else:

            if (sqltableparms[0] == ""):
                opstat.set_status(False)
                opstat.set_errorMsg("No dataframe selcted to export")

            else:

                if (sqltableparms[1] == ""):
                    opstat.set_status(False)
                    opstat.set_errorMsg("No tabl;e selcted to export to")

                else:

                    df = cfg.get_dfc_dataframe_df(sqltableparms[0])

                    labellist = dew.pandas_export_sqltable_labelList

                    try:

                        sqlkeys = [
                            labellist[2], labellist[3], labellist[4],
                            labellist[5], labellist[6], labellist[7]
                        ]
                        sqlvals = [
                            sqltableparms[2], sqltableparms[3],
                            sqltableparms[4], sqltableparms[5],
                            sqltableparms[6], sqltableparms[7]
                        ]
                        sqltypes = [
                            STRING_PARM, STRING_PARM, BOOLEAN_PARM,
                            STRING_PARM, INT_PARM, DICT_PARM
                        ]

                        sqlparms = {}
                        sqladdlparms = {}

                    except Exception as e:
                        opstat.set_status(False)
                        opstat.store_exception("Error parsing Export parms", e)

                    if (opstat.get_status()):

                        try:

                            sqlparms = get_function_parms(
                                sqlkeys, sqlvals, sqltypes)
                            if (not (sqltableparms[8] == "")):
                                sqladdlparms = json.loads(sqltableparms[8])

                            if (len(sqladdlparms) > 0):
                                addlparmskeys = sqladdlparms.keys()
                                for i in range(len(addlparmskeys)):
                                    sqlparms.update({
                                        addlparmskeys[i]:
                                        sqladdlparms.get(addlparmskeys[i])
                                    })

                        except Exception as e:
                            opstat.set_status(False)
                            opstat.store_exception(
                                "Error parsing Export additional parms", e)

                        if (opstat.get_status()):

                            try:

                                df.to_sql(sqltableparms[1], dbconnector,
                                          **sqlparms)

                            except Exception as e:
                                opstat.store_exception(
                                    "Unable to export to sql table", e)

    export_notes = ""

    if (opstat.get_status()):

        if (display):
            #make scriptable
            add_to_script([
                "# Export SQL Table ",
                "from dfcleanser.data_export.data_export_control export export_pandas_sqltable",
                "export_pandas_sqltable(" + json.dumps(sqltableparms) + "," +
                json.dumps(dbcondict) + "," + str(exportid) + ",False)"
            ], opstat)

        export_notes = dbu.get_SQLAlchemy_connector_string(dbconparms)

        if (len(sqltableparms) > 0):
            cfg.set_config_value(exportid + "Parms", sqltableparms)
            cfg.set_config_value(cfg.CURRENT_EXPORTED_FILE_NAME_KEY,
                                 sqltableparms[0], True)

    return (export_notes, opstat)
def get_data_export_start():
    return (cfg.get_config_value(cfg.CURRENT_EXPORT_START_TIME))
Ejemplo n.º 8
0
def set_df_index(parms,display=True):
    """
    * -------------------------------------------------------------------------- 
    * function : set df indices
    * 
    * parms :
    *   parms     -   transform parms
    *   display   -   display flag
    *
    * returns : 
    *  N/A
    * --------------------------------------------------------
    """

    opstat = opStatus()
    
    fparms      =   get_parms_for_input(parms,dftw.df_set_index_transform_input_idList)

    colnames    =   fparms[0]
    
    if(len(colnames) == 0) :
        opstat.set_status(False)
        opstat.set_errorMsg("column names list is empty")
        
    else :
        
        colnames    =   colnames.lstrip("[")
        colnames    =   colnames.rstrip("]")
        colnames    =   colnames.split(",")
        
        if(fparms[2] == "True") :
            drop = True
        else :
            drop = False
            
        if(opstat.get_status()) :
                
            if(fparms[3] == "True") :
                verify = True
            else :
                verify = False
                
    if(opstat.get_status()) :

        try : 
            
            df = cfg.get_current_chapter_df(cfg.DataTransform_ID)
            
            df.set_index(colnames,drop=drop,append=True,inplace=True,verify_integrity=verify)
            
            cfg.set_dfc_dataframe_df(cfg.get_config_value(cfg.CURRENT_TRANSFORM_DF),df)
            
            if(display) :
                #make scriptable
                add_to_script(["# set df index",
                               "from dfcleanser.data_transform.data_transform_dataframe_control set_df_index",
                               "set_df_index(" + json.dumps(parms[1]) + ",False)"],opstat)

        
        except Exception as e: 
            opstat.store_exception("Unable to set index of column(s) : " + str(colnames),e)

    return(opstat)
def display_dfsubset_utility(optionId,parms=None) :
    """
    * ---------------------------------------------------------
    * function : main subset utility control
    * 
    * parms :
    *  optionId     - function to run
    *  parms        - parms to ryn function
    *
    * returns : 
    *  NA
    * --------------------------------------------------------
    """
    
    if(cfg.is_a_dfc_dataframe_loaded()) :
        
        from IPython.display import clear_output
        clear_output()
        
        from dfcleanser.common.html_widgets import define_inputs, are_owner_inputs_defined
        if(not (are_owner_inputs_defined(cfg.SWDFSubsetUtility_ID)) ) :
            define_inputs(cfg.SWDFSubsetUtility_ID,swsw.SWUtility_subset_inputs)
    
        if(optionId == swsm.DISPLAY_MAIN) :
            
            swsw.get_dfsubset_main_taskbar()
            clear_sw_utility_dfsubsetdata()
            
            cfg.display_data_select_df(cfg.SWDFSubsetUtility_ID)
            swsm.clear_current_subset_data() 
            
        elif(optionId == swsm.DISPLAY_GET_SUBSET) :
            
            swsm.clear_current_subset_data()
            
            if(DEBUG_SUBSET) :
                swsm.set_current_subset_sequence(swsm.dfc_subset_sequence())
                print("\ncurrent_subset_sequence\n")
                print("input_df_title",swsm.get_current_subset_sequence().get_input_df_title()) 
                print("get_sequence_title",swsm.get_current_subset_sequence().get_sequence_title()) 
                print("get_sequence_steps",swsm.get_current_subset_sequence().get_sequence_steps()) 
                if(not (swsm.get_current_subset_sequence().get_sequence_steps() is None)) :
                    print("get_total_sequence_steps",swsm.get_current_subset_sequence().get_total_sequence_steps())
                    print("get_output_csv",swsm.get_current_subset_sequence().get_output_csv()) 
                    print("get_output_dfc_df_title",swsm.get_current_subset_sequence().get_output_dfc_df_title()) 
            
            
            swsw.display_df_subset_setup()
            
            
            if(DEBUG_SUBSET) :
                print("DISPLAY_GET_SUBSET",parms)
                print("DISPLAY_GET_SUBSET : clear data")
                print(swsm.get_current_subset_sequence())
                print(swsm.get_current_subset_df())
                print(swsm.get_current_subset_step())
                print("new_sequence")
                swsm.dump_current_step()
                swsm.dump_current_sequence()
            
        
        elif(optionId == swsm.PROCESS_GET_SUBSET) :
            
            current_step        =   swsm.get_current_subset_step()
            
            if(not (current_step is None)) :
                current_sequence    =   swsm.get_current_subset_sequence()
                current_sequence.add_step_to_sequence_steps(current_step) 
            
            fparms              =   get_parms_for_input(parms,swsw.get_subset_input_idList)
            
            if(len(fparms) > 0) :
        
                df_title            =   fparms[0]
                df                  =   cfg.get_dfc_dataframe_df(df_title)
                col_names           =   fparms[1]
                col_action          =   fparms[3]
        
            new_subset_df           =   drop_add_cols(col_names,col_action,df)
            new_subset_df_title     =   df_title
    
            new_subset_step     =   swsm.dfc_subset_step(new_subset_df_title,col_names,col_action)
            swsm.set_current_subset_step(new_subset_step)
            swsm.set_current_subset_df(new_subset_df)
            
            swsw.display_df_criteria(new_subset_df_title,new_subset_df) 
            
            if(DEBUG_SUBSET) :
                print("\nPROCESS_GET_SUBSET\n  ",parms,"\n  ",fparms)
                swsm.dump_current_step()
                swsm.dump_current_sequence()

            
        elif(optionId == swsm.DISPLAY_SAVED_SUBSET) :
            
            swsw.display_saved_subset_sequences() 

            if(DEBUG_SUBSET) :
                print("\nDISPLAY_SAVED_SUBSET",parms)
            
            
        elif(optionId == swsm.PROCESS_RUN_CRITERIA) :
            
            opstat  =   opStatus()
            
            
            fparms  =   get_parms_for_input(parms,swsw.get_subset_criteria_input_idList)
            
            subset_title    =   fparms[0]
            
            if(len(subset_title) == 0) :
                
                current_sequence    =   swsm.get_current_subset_sequence()
                total_steps         =   current_sequence.get_total_sequence_steps()
                current_step        =   swsm.get_current_subset_step()
                subset_title        =   current_step.get_input_subset_df_title() + "_subset_" + str(total_steps+1)
                
            criteria        =   fparms[2]
            
            if(len(criteria) > 0) :
                
                try :
                    
                    clock   =   RunningClock()
                    clock.start()
                    
                    final_criteria  =   (swsm.starting_criteria_preamble + criteria + swsm.starting_criteria_postamble)
        
                    exec(final_criteria)
                    
                    current_step    =   swsm.get_current_subset_step()
                    current_step.set_criteria(criteria)
                    current_step.set_output_subset_df_title(subset_title)
                    
                    clock.stop()
                    
                except Exception as e:
                    opstat.store_exception("Error running df_criteria " + criteria,e)
                    
                    clock.stop()
            
            
            if(opstat.get_status()) :
                swsw.display_process_subset() 
            else :
                display_exception(opstat)

            if(DEBUG_SUBSET) :
                print("PROCESS_RUN_CRITERIA : End")
                swsm.dump_current_step()
                swsm.dump_current_sequence()
            
        elif(optionId ==  swsm.DISPLAY_SAVE_SUBSET) :
            
            fparms              =   get_parms_for_input(parms,swsw.get_subset_run_input_idList)
            
            current_sequence    =   swsm.get_current_subset_sequence()
            current_step        =   swsm.get_current_subset_step()
            current_sequence.add_step_to_sequence_steps(current_step)
    
            if(len(fparms) > 0) :
        
                df_title            =   fparms[0]
                df                  =   swsm.get_current_subset_df()
                col_names           =   fparms[1]
                col_action          =   fparms[3]
 
            new_subset_df   =   drop_add_cols(col_names,col_action,df)
            
            swsw.display_save_subset(df_title,new_subset_df) 
            
            if(DEBUG_SUBSET) :
                print("DISPLAY_SAVE_SUBSET",parms,fparms)
                swsm.dump_current_step()
                swsm.dump_current_sequence()
            
 
            
        elif(optionId ==  swsm.DISPLAY_SAVE_AND_GET_SUBSET) :
            
            fparms              =   get_parms_for_input(parms,swsw.get_subset_run_input_idList)
            
            current_sequence    =   swsm.get_current_subset_sequence()
            current_step        =   swsm.get_current_subset_step()
            current_sequence.add_step_to_sequence_steps(current_step)
    
            if(len(fparms) > 0) :
        
                df_title            =   fparms[0]
                col_names           =   fparms[1]
                col_action          =   fparms[3]
        
            new_subset_step     =   swsm.dfc_subset_step(df_title,col_names,col_action)
            df                  =   swsm.get_current_subset_df()

            new_subset_df       =   drop_add_cols(col_names,col_action,df)
            swsm.set_current_subset_df(new_subset_df)
            swsm.set_current_subset_step(new_subset_step)
            
            swsw.display_df_criteria(df_title,new_subset_df)

            if(DEBUG_SUBSET) :
                print("PROCESS_SAVE_AND_GET_SUBSET",parms,fparms)
                swsm.dump_current_step()
                swsm.dump_current_sequence()

            
        elif(optionId ==  swsm.PROCESS_SAVE_SUBSET) :
            
            save_subset_run(parms,0)
            
        elif(optionId ==  swsm.PROCESS_SUBSET_SEQUENCE) :
            
            opstat  =   opStatus()
                        
            fparms      =   get_parms_for_input(parms,swsw.get_subset_sequences_input_idList)
            
            sequence    =   fparms[0]
            run_option  =   fparms[1]
            
            saved_sequence  =   swsm.get_subset_sequence(sequence)
            first_step      =   saved_sequence.get_sequence_step(0)
            
            df_title        =   first_step.get_input_subset_df_title()
            df              =   cfg.get_dfc_dataframe_df(df_title)
            
            if(df is None) :
                
                swsw.get_dfsubset_main_taskbar()
                cfg.display_data_select_df(cfg.SWDFSubsetUtility_ID)
                
                opstat.set_status(False)
                opstat.set_errorMsg("subset sequence starting df '" + df_title + "' is not currently loaded in dfc")
                display_exception(opstat)
            
            else :
                
                if(run_option == "Auto Run") :
                    
                    total_steps     =   saved_sequence.get_total_sequence_steps()
                    swsm.set_current_subset_df(df)
                    
                    for i in range(total_steps) :
                        
                        current_step        =   saved_sequence.get_sequence_step(i)
                        current_columns     =   current_step.get_col_names_list()
                        columns_action      =   current_step.get_keep_drop_flag()
                        criteria            =   current_step.get_criteria()
                        output_df_title     =   current_step.get_output_subset_df_title()
                        
                        current_df          =   swsm.get_current_subset_df()   
                        
                        if(len(current_columns) > 0) :
                            
                            colnames        =   list(current_df.columns)
                            drop_columns    =   []
                            
                            for i in range(len(colnames)) :
                                
                                if(columns_action == "Keep") :
                                    if(not (colnames[i] in current_columns)) :
                                        drop_columns.append(colnames[i])
                                else :
                                    if(colnames[i] in current_columns) :
                                        drop_columns.append(colnames[i])  
                                        
                            if(len(drop_columns) > 0 ) :
                                
                                try :
                                    current_df.drop(drop_columns, axis=1, inplace=True)   
                                except :
                                    opstat.set_status(False)
                                    opstat.set_errorMsg("Unable to drop columns from subset dataframe")
                        
                        swsm.set_current_subset_df(current_df)
                        
                        try :
                            
                            current_df         =     swsm.get_current_subset_df()
                            exec(criteria + swsm.starting_criteria_postamble)
                            current_df         =     swsm.get_current_subset_df()
                            
                        except Exception as e:
                            opstat.store_exception("Error running subset sequence '" + sequence + "'",e)

                    swsw.display_save_subset(output_df_title,swsm.get_current_subset_df(),True)    
                    
                else :
                
                    total_steps     =   saved_sequence.get_total_sequence_steps()
                    swsm.set_current_subset_df(df)
                    
                    current_step        =   saved_sequence.get_sequence_step(0)
                    current_df_title    =   current_step.get_input_subset_df_title()
                    current_columns     =   current_step.get_col_names_list()
                    columns_action      =   current_step.get_keep_drop_flag()
                    
                    swsw.display_manual_df_subset_setup(saved_sequence.get_sequence_title(),current_df_title,current_columns,columns_action,0)
                    
                    swsm.set_current_subset_step_id(0)
                    
                    swsm.set_current_subset_sequence(saved_sequence)

                     
        elif(optionId ==  swsm.PROCESS_GET_NEXT_SUBSET) :
            
            fparms      =   get_parms_for_input(parms,swsw.get_manual_input_idList) 
            
            collist     =   fparms[1]
            collist     =   collist.lstrip("[")
            collist     =   collist.rstrip("]")
            collist     =   collist.split(",")
            
            keep_drop   =   fparms[3]
            
            saved_sequence      =   swsm.get_current_subset_sequence()
            
            total_steps     =   saved_sequence.get_total_sequence_steps()
            current_step_id =   swsm.get_current_subset_step_id()
            
            if(current_step_id < total_steps) :
                current_step        =   saved_sequence.get_sequence_step(swsm.get_current_subset_step_id())
            else :
                swsm.set_current_subset_step_col_names_list(collist)
                swsm.set_current_subset_keep_drop_flag(keep_drop)
                current_step        =   swsm.get_current_subset_step()
                
            swsm.dump_current_step() 
            
            swsm.set_current_subset_step(current_step)
            
            current_df_title    =   current_step.get_input_subset_df_title()

            current_df          =   swsm.get_current_subset_df()
            current_columns     =   current_step.get_col_names_list()
            columns_action      =   current_step.get_keep_drop_flag()
            criteria            =   current_step.get_criteria()
            output_df_title     =   current_step.get_output_subset_df_title()
            
            if(len(current_columns) > 0) :
                            
                colnames        =   list(current_df.columns)
                drop_columns    =   []
                            
                for i in range(len(colnames)) :
                                
                    if(columns_action == "Keep") :
                        if(not (colnames[i] in current_columns)) :
                            drop_columns.append(colnames[i])
                    else :
                        if(colnames[i] in current_columns) :
                            drop_columns.append(colnames[i])  
                                        
                if(len(drop_columns) > 0 ) :
                                
                    try :
                        current_df.drop(drop_columns, axis=1, inplace=True)   
                    except :
                        opstat.set_status(False)
                        opstat.set_errorMsg("Unable to drop columns from subset dataframe")
                        
            
            swsw.display_next_criteria(current_df_title,current_df,criteria,output_df_title)
            
            
        elif(optionId ==  swsm.PROCESS_NEXT_CRITERIA) :
            
            opstat  =   opStatus()
            
            fparms      =   get_parms_for_input(parms,swsw.get_next_criteria_input_idList)  
            
            output_df_title     =   fparms[0]
            criteria            =   fparms[2]
            
            current_sequence    =   swsm.get_subset_sequence(sequence)
            sequence_title      =   current_sequence.get_sequence_title()
            
            try :
                            
                current_df         =     swsm.get_current_subset_df()
                exec(criteria + swsm.starting_criteria_postamble)
                current_df         =     swsm.get_current_subset_df()
                            
            except Exception as e:
                
                opstat.store_exception("Error running subset sequence '" + sequence_title + "'",e)
                
                current_df_title    =   current_step.get_input_subset_df_title()
                current_df          =   swsm.get_current_subset_df()
                criteria            =   current_step.get_criteria()
                output_df_title     =   current_step.get_output_subset_df_title()
                
                swsw.display_next_criteria(current_df_title,current_df,criteria,output_df_title)                
                
                display_exception(opstat)
                
            if(opstat.get_status()) :
                
                
                swsm.set_current_subset_df(current_df)
                swsm.set_current_subset_step_id(swsm.get_current_subset_step_id() + 1)
                
                if(swsm.get_current_subset_step_id() >= swsm.get_current_subset_sequence().get_total_sequence_steps()) :
                    
                    swsw.display_sequence_save_subset(output_df_title,swsm.get_current_subset_df()) 

                else :
                    
                    current_step        =   swsm.get_current_subset_sequence().get_sequence_step(swsm.get_current_subset_step_id())
                    current_df_title    =   current_step.get_input_subset_df_title()
                    current_columns     =   current_step.get_col_names_list()
                    columns_action      =   current_step.get_keep_drop_flag()
                    
                    swsw.display_manual_df_subset_setup(swsm.get_current_subset_sequence().get_sequence_title(),current_df_title,current_columns,columns_action,swsm.get_current_subset_step_id())
                    
        elif(optionId ==  swsm.DISPLAY_NEW_STEP) :  
                
            current_sequence    =   swsm.get_current_subset_sequence()
            sequence_title      =   current_sequence.get_sequence_title()
            
            current_step        =   swsm.get_current_subset_step()
            df_title            =   current_step.get_output_subset_df_title()
            current_df          =   swsm.get_current_subset_df()
            current_columns     =   []
            current_action      =   "Keep"
            criteria            =   swsm.starting_criteria
            output_df_title     =   ""
            
            current_step        =   swsm.dfc_subset_step(df_title,current_columns,current_action,criteria,output_df_title)
            swsm.set_current_subset_step(current_step)
            
            swsw.display_manual_df_subset_setup(sequence_title,df_title,current_columns,current_action,swsm.get_current_subset_step_id())
            #swsw.display_next_criteria(df_title,current_df,criteria,output_df_title)
                
        elif(optionId ==  swsm.PROCESS_SAVE_SAVED_SUBSET ) :   

            save_subset_run(parms,1)
        
        elif(optionId ==  swsm.DISPLAY_GET_REMOTE_SUBSET) :  
            
            chapterid   =   parms[0]
            
            new_config_df   =   None
            
            if(chapterid == cfg.DataInspection_ID)      :   new_config_df  =   cfg.get_config_value(cfg.CURRENT_INSPECTION_DF)
            elif(chapterid == cfg.DataCleansing_ID)     :   new_config_df  =   cfg.get_config_value(cfg.CURRENT_CLEANSE_DF)
            elif(chapterid == cfg.DataTransform_ID)     :   new_config_df  =   cfg.get_config_value(cfg.CURRENT_TRANSFORM_DF)
            elif(chapterid == cfg.DataExport_ID)        :   new_config_df  =   cfg.get_config_value(cfg.CURRENT_EXPORT_DF)
            elif(chapterid == cfg.DataImport_ID)        :   new_config_df  =   cfg.get_config_value(cfg.CURRENT_IMPORT_DF)
            elif(chapterid == cfg.SWGeocodeUtility_ID)  :   new_config_df  =   cfg.get_config_value(cfg.CURRENT_GEOCODE_DF)
            elif(chapterid == cfg.SWDFSubsetUtility_ID) :   new_config_df  =   cfg.get_config_value(cfg.CURRENT_SUBSET_DF)
            
            cfg.set_config_value(cfg.CURRENT_SUBSET_DF,new_config_df)
            
            swsm.clear_current_subset_data()
            swsw.display_df_subset_setup()
            
    else :
        
        swsw.get_dfsubset_main_taskbar()
        
        cfg.drop_config_value(cfg.CURRENT_SUBSET_DF)
        clear_sw_utility_dfsubsetdata()
        
        cfg.display_data_select_df(cfg.SWDFSubsetUtility_ID)
            
        if(not(optionId == swsm.DISPLAY_MAIN)) :
            cfg.display_no_dfs(cfg.SWDFSubsetUtility_ID)
Ejemplo n.º 10
0
def display_dc_pandas_export_sql_inputs(fId,
                                        dbId,
                                        dbconparms,
                                        exportparms=None):
    """
    * -------------------------------------------------------------------------- 
    * function : display pandas sql export form
    * 
    * parms :
    *   fid             -   export type
    *   dbid            -   database id
    *   dbconparms      -   db connector parms
    *   exportparms     -   export parms
    *
    * returns : N/A
    * --------------------------------------------------------
    """

    opstat = opStatus()
    opstatStatus = True
    listHtml = ""
    dbid = int(dbId)
    fid = int(fId)
    fparms = None

    if (fid == 0):

        dbid = cfg.get_config_value(cfg.CURRENT_DB_ID_KEY)
        from dfcleanser.data_import.data_import_widgets import (
            get_table_names, TABLE_NAMES, get_rows_html)
        tablelist = get_table_names(dbid, opstat)
        listHtml = get_rows_html(tablelist, TABLE_NAMES, True)

    elif (fid == 1):

        fparms = get_parms_for_input(exportparms,
                                     pandas_export_sqltable_idList)
        dbid = cfg.get_config_value(cfg.CURRENT_DB_ID_KEY)
        cfg.set_config_value(pandas_export_sqltable_id + "Parms", fparms)

        if (len(fparms[0]) > 0):
            from dfcleanser.data_import.data_import_widgets import (
                get_column_names, get_rows_html)
            from dfcleanser.data_import.data_import_model import COLUMN_NAMES
            columnlist = get_column_names(dbid, fparms[1], opstat)
            listHtml = get_rows_html(columnlist, COLUMN_NAMES, True)
        else:
            opstat.set_status(False)
            opstat.set_errorMsg("No Table Selected")

    elif (fid == 2):

        cfg.set_config_value(cfg.CURRENT_DB_ID_KEY, dbid)

        dbcondict = {}
        if (not (dbconparms == None)):
            parse_connector_parms(dbconparms, dbid, dbcondict)
        else:
            conparms = get_stored_con_Parms(dbid)
            parse_connector_parms(conparms, dbid, dbcondict)

        listHtml = get_db_connector_list(dbid, dbcondict)

    if (not (opstat.get_status())):
        dbcondict = {}
        conparms = get_stored_con_Parms(dbid)
        parse_connector_parms(conparms, dbid, dbcondict)

        listHtml = get_db_connector_list(dbid, dbcondict)

        opstatStatus = opstat.get_status()
        opstatErrormsg = opstat.get_errorMsg()
        opstat.set_status(True)

    if (opstat.get_status()):

        export_sql_input_form = InputForm(
            pandas_export_sqltable_id, pandas_export_sqltable_idList,
            pandas_export_sqltable_labelList, pandas_export_sqltable_typeList,
            pandas_export_sqltable_placeholderList,
            pandas_export_sqltable_jsList, pandas_export_sqltable_reqList)

        selectDicts = []
        df_list = cfg.get_dfc_dataframes_select_list(cfg.DataExport_ID)
        selectDicts.append(df_list)

        exists = {"default": "fail", "list": ["fail", "replace", "append"]}
        selectDicts.append(exists)
        index = {"default": "True", "list": ["True", "False"]}
        selectDicts.append(index)

        get_select_defaults(export_sql_input_form, pandas_export_sqltable_id,
                            pandas_export_sqltable_idList,
                            pandas_export_sqltable_typeList, selectDicts)

        export_sql_input_form.set_shortForm(False)
        export_sql_input_form.set_gridwidth(680)
        export_sql_input_form.set_custombwidth(125)
        export_sql_input_form.set_fullparms(True)

        export_sql_input_html = ""
        export_sql_input_html = export_sql_input_form.get_html()

        export_sql_heading_html = "<div>" + get_pandas_export_input_title(
            dem.SQLTABLE_EXPORT, dbid) + "</div><br>"

        if (not (exportparms == None)):
            cfg.set_config_value(pandas_export_sqltable_id + "Parms", fparms)

        gridclasses = [
            "dfcleanser-common-grid-header", "dfc-left", "dfc-right"
        ]
        gridhtmls = [export_sql_heading_html, listHtml, export_sql_input_html]

        display_generic_grid("data-import-sql-table-wrapper", gridclasses,
                             gridhtmls)

    if (not (opstatStatus)):
        opstat.set_status(opstatStatus)
        opstat.set_errorMsg(opstatErrormsg)
        display_exception(opstat)
Ejemplo n.º 11
0
def display_dc_export_forms(exid, detid=0, notes=False):
    """
    * -------------------------------------------------------------------------- 
    * function : display pandas export input forms
    * 
    * parms :
    *   exid    -   export type
    *   detid   -   detail id
    *   notes   -   notes flag
    *
    * returns : N/A
    * --------------------------------------------------------
    """

    clear_output()

    # add the main import task bar
    if (exid == dem.EXPORT_TB_ONLY):

        display_export_main_taskbar()
        cfg.display_data_select_df(cfg.DataExport_ID)

    else:

        if (cfg.is_a_dfc_dataframe_loaded()):

            # add the pandas import task bar or pandas details form
            if ((exid == dem.EXPORT_PANDAS_TB_ONLY)
                    or (exid == dem.EXPORT_PANDAS_TB_PLUS_DETAILS)):

                # add the pandas export details form
                if (exid == dem.EXPORT_PANDAS_TB_PLUS_DETAILS):

                    if (detid == dem.SQLTABLE_EXPORT):

                        import dfcleanser.common.db_utils as dbutils

                        cfg.drop_config_value(pandas_export_sqltable_id +
                                              "Parms")

                        dbid = cfg.get_config_value(cfg.CURRENT_DB_ID_KEY)

                        if (dbid == None):
                            cfg.set_config_value(cfg.CURRENT_DB_ID_KEY,
                                                 dbutils.MySql)
                            conparms = cfg.get_config_value(
                                dbutils.MYSQL_DBCON_PARMS)
                            if (conparms == None):
                                conparms = [
                                    "", "", "", "", dbutils.pymysql_library
                                ]

                        elif (dbid == dbutils.MySql):
                            conparms = cfg.get_config_value(
                                dbutils.MYSQL_DBCON_PARMS)
                            if (conparms == None):
                                conparms = [
                                    "", "", "", "", dbutils.pymysql_library
                                ]

                        elif (dbid == dbutils.MS_SQL_Server):
                            conparms = cfg.get_config_value(
                                dbutils.MSSQL_DBCON_PARMS)
                            if (conparms == None):
                                conparms = [
                                    "", "", "", "", dbutils.pyodbc_library
                                ]

                        elif (dbid == dbutils.SQLite):
                            conparms = cfg.get_config_value(
                                dbutils.SQLITE_DBCON_PARMS)
                            if (conparms == None):
                                conparms = ["", dbutils.sqlite3_library]

                        elif (dbid == dbutils.Postgresql):
                            conparms = cfg.get_config_value(
                                dbutils.POSTGRESQL_DBCON_PARMS)
                            if (conparms == None):
                                conparms = [
                                    "", "", "", "", dbutils.psycopg2_library
                                ]

                        elif (dbid == dbutils.Oracle):
                            conparms = cfg.get_config_value(
                                dbutils.ORACLE_DBCON_PARMS)
                            if (conparms == None):
                                conparms = [
                                    "", "", "", dbutils.cx_oracle_library
                                ]

                        elif (dbid == dbutils.Custom):
                            conparms = cfg.get_config_value(
                                dbutils.CUSTOM_DBCON_PARMS)
                            if (conparms == None): conparms = [""]

                        dbutils.display_db_connector_inputs(
                            cfg.get_config_value(cfg.CURRENT_DB_ID_KEY),
                            conparms, dbutils.SQL_EXPORT)

                    else:

                        display_export_main_taskbar()

                        pandas_export_form = get_pandas_export_input_form(
                            detid)

                        if (cfg.get_dfc_mode() == cfg.INLINE_MODE):
                            pandas_export_form.set_shortForm(True)
                            pandas_export_form.set_gridwidth(640)
                            pandas_export_form.set_custombwidth(110)
                        else:
                            pandas_export_form.set_gridwidth(480)
                            pandas_export_form.set_custombwidth(100)

                        pandas_input_html = ""
                        pandas_input_html = pandas_export_form.get_html()

                        pandas_input_heading_html = "<div>" + get_pandas_export_input_title(
                            detid) + "</div>"

                        gridclasses = [
                            "dfcleanser-common-grid-header", "dfc-footer"
                        ]
                        gridhtmls = [
                            pandas_input_heading_html, pandas_input_html
                        ]

                        if (cfg.get_dfc_mode() == cfg.INLINE_MODE):
                            display_generic_grid("data-import-wrapper",
                                                 gridclasses, gridhtmls)
                        else:
                            display_generic_grid("data-import-pop-up-wrapper",
                                                 gridclasses, gridhtmls)

                else:
                    display_pandas_export_taskbar()

            elif (exid == dem.EXPORT_CUSTOM_ONLY):

                print("dem.EXPORT_PANDAS_TB_ONLY")

                # add the custom import form
                exportCustomDetailsForm = InputForm(
                    custom_export_id, custom_export_idList,
                    custom_export_labelList, custom_export_typeList,
                    custom_export_placeholderList, custom_export_jsList,
                    custom_export_reqList)

                if (notes):
                    customNotes = [
                        "To create custom export code in the code cell below hit 'New Custom Export'",
                        "&nbsp;&nbsp;&nbsp;&nbsp;(enter and test export in the code cell below)",
                        "&nbsp;&nbsp;&nbsp;&nbsp;(leave the '# custom export' comment line in the code cell",
                        "&nbsp;&nbsp;&nbsp;&nbsp;(call dfcleanser.common.cfg.get_dfc_dataframe_df() to get the current dataframe)",
                        "To run the export code in the Custom Export Code box hit 'Run Custom Export' button",
                        "&nbsp;&nbsp;&nbsp;&nbsp;(only the code in the Custom Export Code box is run and stored for scripting)",
                        "Once import successful hit 'Save Custom Import' button to store import code for future retrieval",
                        "To drop the custom export code and clear the Custom Export Code box hit 'Drop Custom Export' button"
                    ]

                    print("\n")
                    display_inline_help(customNotes, 92)

                selectDicts = []

                df_list = cfg.get_dfc_dataframes_select_list(cfg.DataExport_ID)
                selectDicts.append(df_list)

                flags = {"default": "False", "list": ["True", "False"]}
                selectDicts.append(flags)

                get_select_defaults(exportCustomDetailsForm, custom_export_id,
                                    custom_export_idList,
                                    custom_export_typeList, selectDicts)

                if (cfg.get_dfc_mode() == cfg.INLINE_MODE):
                    exportCustomDetailsForm.set_shortForm(True)
                    exportCustomDetailsForm.set_gridwidth(640)
                    exportCustomDetailsForm.set_custombwidth(110)
                else:
                    exportCustomDetailsForm.set_gridwidth(480)
                    exportCustomDetailsForm.set_custombwidth(100)

                exportCustomDetailsForm.set_fullparms(True)

                from dfcleanser.common.html_widgets import new_line
                custom_code = "# add USER CODE to export the df" + new_line + new_line
                custom_code = (
                    custom_code +
                    "from dfcleanser.common.cfg import get_dfc_dataframe" +
                    new_line)
                custom_code = (custom_code +
                               "df = get_dfc_dataframe_df(dataframe_title)" +
                               new_line + new_line)
                custom_code = (custom_code + "# USER CODE" + new_line)

                cfg.set_config_value(custom_export_id + "Parms",
                                     ["", custom_code, ""])

                custom_export_html = ""
                custom_export_html = exportCustomDetailsForm.get_html()

                custom_export_heading_html = "<div>Custom Export</div><br>"

                gridclasses = ["dfcleanser-common-grid-header", "dfc-footer"]
                gridhtmls = [custom_export_heading_html, custom_export_html]

                if (cfg.get_dfc_mode() == cfg.INLINE_MODE):
                    display_generic_grid("data-import-wrapper", gridclasses,
                                         gridhtmls)
                else:
                    display_generic_grid("data-import-pop-up-wrapper",
                                         gridclasses, gridhtmls)

        else:

            display_export_main_taskbar()
            cfg.display_data_select_df(cfg.DataExport_ID)
            cfg.display_no_dfs(cfg.DataExport_ID)
Ejemplo n.º 12
0
def display_export_dc_sql_details_forms(dblibid):
    """
    * -------------------------------------------------------------------------- 
    * function : display export sql form
    * 
    * parms :
    *   dblibid   -   db loib id
    *
    * returns : N/A
    * --------------------------------------------------------
    """

    import dfcleanser.common.db_utils as dbutils

    if (dblibid == None):
        cfg.drop_config_value(cfg.CURRENT_DB_ID_KEY)

    if ((dblibid == dbutils.pymysql_library)
            or (dblibid == dbutils.mysql_connector_library)):
        cfg.set_config_value(cfg.CURRENT_DB_ID_KEY, dbutils.MySql)
    elif ((dblibid == dbutils.pyodbc_library)
          or (dblibid == dbutils.pymssql_library)):
        cfg.set_config_value(cfg.CURRENT_DB_ID_KEY, dbutils.MS_SQL_Server)
    elif ((dblibid == dbutils.sqlite_library)
          or (dblibid == dbutils.sqlite3_library)):
        cfg.set_config_value(cfg.CURRENT_DB_ID_KEY, dbutils.SQLite)
    elif (dblibid == dbutils.psycopg2_library):
        cfg.set_config_value(cfg.CURRENT_DB_ID_KEY, dbutils.Postgresql)
    elif (dblibid == dbutils.cx_oracle_library):
        cfg.set_config_value(cfg.CURRENT_DB_ID_KEY, dbutils.Oracle)
    elif (dblibid == "custom"):
        cfg.set_config_value(cfg.CURRENT_DB_ID_KEY, dbutils.Custom)

    dbid = cfg.get_config_value(cfg.CURRENT_DB_ID_KEY)

    if (dbid == None):
        inparms = cfg.get_config_value(cfg.MYSQL_IMPORT_PARMS_KEY)
        if (inparms == None):
            inparms = ["", "", "", "", dblibid]
        else:
            inparms[4] = dblibid

    if (dbid == dbutils.MySql):
        inparms = cfg.get_config_value(cfg.MYSQL_IMPORT_PARMS_KEY)
        if (inparms == None):
            inparms = ["", "", "", "", dblibid]
        else:
            inparms[4] = dblibid

    elif (dbid == dbutils.MS_SQL_Server):
        inparms = cfg.get_config_value(cfg.MSSQL_IMPORT_PARMS_KEY)
        if (inparms == None):
            inparms = ["", "", "", "", dblibid]
        else:
            inparms[4] = dblibid

    elif (dbid == dbutils.Postgresql):
        inparms = cfg.get_config_value(cfg.POSTGRESQL_IMPORT_PARMS_KEY)
        if (inparms == None):
            inparms = ["", "", "", "", dblibid]
        else:
            inparms[4] = dblibid

    elif (dbid == dbutils.SQLite):
        inparms = cfg.get_config_value(cfg.SQLITE_IMPORT_PARMS_KEY)
        if (inparms == None):
            inparms = ["", dblibid]
        else:
            inparms[1] = dblibid

    elif (dbid == dbutils.Oracle):
        inparms = cfg.get_config_value(cfg.ORACLE_IMPORT_PARMS_KEY)
        if (inparms == None):
            inparms = ["", "", "", dblibid]
        else:
            inparms[1] = dblibid

    elif (dbid == dbutils.Custom):
        inparms = cfg.get_config_value(cfg.CUSTOM_IMPORT_PARMS_KEY)

    from dfcleanser.common.db_utils import display_db_connector_inputs, SQL_EXPORT
    display_db_connector_inputs(cfg.get_config_value(cfg.CURRENT_DB_ID_KEY),
                                inparms, SQL_EXPORT)