コード例 #1
0
def display_inspect_nans():
    """
    * -------------------------------------------------------------------------- 
    * function : display the inspect nans option
    * 
    * parms :
    *
    * returns : 
    *  N/A
    * --------------------------------------------------------
    """

    nans_rows_table = dcTable("Rows with most NaNs", "nansrowTable",
                              cfg.DataInspection_ID)
    nans_cols_table = dcTable("Columns with most NaNs", "nansTable",
                              cfg.DataInspection_ID)
    diw.display_null_data(cfg.get_current_chapter_df(cfg.DataInspection_ID),
                          nans_rows_table, nans_cols_table, 120)
コード例 #2
0
def process_county_cities(parms):

    opstat = opStatus()

    fparms = get_parms_for_input(parms, suzw.county_cities_input_idList)
    state = fparms[0][:2]
    county = fparms[1]

    cfg.set_config_value(suzw.county_cities_input_id + "Parms", fparms)

    suzw.display_get_cities_for_county(parms)

    print("\n")

    citiesHeader = [""]
    citiesRows = []
    citiesWidths = [20, 80]
    citiesAligns = ["left", "left"]

    primary_cities = suzm.get_cities_for_county(state,
                                                county,
                                                city_type=suzm.ANY_CITY_TYPE)

    if (not (primary_cities is None)):
        citiesRows.append(["US Zipcode Cities", str(primary_cities)])

    cities_table = None

    from dfcleanser.common.table_widgets import dcTable, get_row_major_table, ROW_MAJOR, SCROLL_DOWN

    cities_table = dcTable(
        "Cities For " + str(county) + " - " +
        str(suzm.get_state_name(state).upper()), 'citiescodesid',
        cfg.SWZipcodeUtility_ID, citiesHeader, citiesRows, citiesWidths,
        citiesAligns)

    cities_table.set_small(True)
    cities_table.set_checkLength(False)

    cities_table.set_border(True)
    cities_table.set_tabletype(ROW_MAJOR)
    cities_table.set_rowspertable(50)
    citiesHtml = get_row_major_table(cities_table, SCROLL_DOWN, False)

    gridclasses = ["dfc-top"]
    gridhtmls = [citiesHtml]

    display_generic_grid("display-geocode-coords-wrapper", gridclasses,
                         gridhtmls)
コード例 #3
0
def display_inspect_categories():
    """
    * -------------------------------------------------------------------------- 
    * function : display the inspect categoriies option
    * 
    * parms :
    *
    * returns : 
    *  N/A
    * --------------------------------------------------------
    """

    opstat = opStatus()

    clock = RunningClock()
    clock.start()

    try:

        cattable = dcTable("Category Columns", "catcolsTable",
                           cfg.DataInspection_ID)

        catcandidatetable = dcTable("Category Candidate Columns",
                                    "catcandcolsTable", cfg.DataInspection_ID)

        numcats, numcands = diw.display_df_categories(
            cfg.get_current_chapter_df(cfg.DataInspection_ID), cattable,
            catcandidatetable)

    except Exception as e:
        opstat.store_exception("Error displaying category data\n ", e)

    clock.stop()

    if (not (opstat.get_status())):
        display_exception(opstat)
コード例 #4
0
def get_df_stats_table(df_title, df, small=False):
    """
    * -------------------------------------------------------------------------- 
    * function : get col stats for df columns
    * 
    * parms :
    *  df_title -   dataframe title
    *
    * returns : N/A
    * --------------------------------------------------------
    """

    colstatsHeader = []
    colstatsRows = []
    colstatsWidths = [40, 60]
    colstatsAligns = ["left", "left"]

    columns = list(df.columns)

    colstatsRows.append(["Number Of Rows", str(len(df))])
    colstatsRows.append(["Number Of Columns", str(len(columns))])

    colstats_table = None

    colstats_table = dcTable("df Stats For dataframe '" + str(df_title) + "'",
                             'dfcolstatsid', cfg.SWDFSubsetUtility_ID,
                             colstatsHeader, colstatsRows, colstatsWidths,
                             colstatsAligns)

    colstats_table.set_small(True)

    if (small):
        colstats_table.set_smallwidth(50)
        colstats_table.set_smallmargin(260)
    else:
        colstats_table.set_smallwidth(50)
        colstats_table.set_smallmargin(160)

    colstats_table.set_checkLength(False)

    colstats_table.set_border(True)
    colstats_table.set_tabletype(ROW_MAJOR)
    colstats_table.set_rowspertable(50)
    colstatsHtml = get_row_major_table(colstats_table, SCROLL_DOWN, False)

    return (colstatsHtml + "<br>")
コード例 #5
0
def display_inspect_datatypes(option, df_data_info):
    """
    * -------------------------------------------------------------------------- 
    * function : display the datatypes option
    * 
    * parms :
    *
    * returns : 
    *  N/A
    * --------------------------------------------------------
    """

    opstat = opStatus()

    import matplotlib.pyplot as plt

    clock = RunningClock()
    clock.start()

    try:

        if (not (option == dim.DISPLAY_FULL_COLUMN_NAMES)):
            data_types_table = dcTable("Column Data Types", "datatypesTable",
                                       cfg.DataInspection_ID)
        else:
            data_types_table = None

        data_types_html = diw.display_df_datatypes(data_types_table,
                                                   df_data_info[0],
                                                   df_data_info[1],
                                                   df_data_info[2], option,
                                                   False)

        gridclasses = ["dfc-main"]
        gridhtmls = [data_types_html]

        if (cfg.get_dfc_mode() == cfg.INLINE_MODE):
            display_generic_grid("df-inspection-wrapper", gridclasses,
                                 gridhtmls)
        else:
            display_generic_grid("df-inspection-pop-up-wrapper", gridclasses,
                                 gridhtmls)

        print("\n")

        import matplotlib.pyplot as plt
        import numpy as np

        font = {'fontsize': 14}
        font2 = {'fontsize': 18}

        objects = []
        for i in range(len(df_data_info[0])):
            ttype = str(df_data_info[0][i])
            ttype = ttype.replace("datetime.", "")

            objects.append(ttype)

        y_pos = np.arange(len(objects))

        plt.bar(y_pos,
                df_data_info[1],
                align='center',
                alpha=0.5,
                color='#428bca')
        plt.xticks(y_pos, objects, rotation='vertical')
        plt.ylabel('Type Counts', fontdict=font)
        plt.xlabel('Data Types', fontdict=font)
        plt.title('Column Data Types', fontdict=font2)

        plt.show()

    except Exception as e:
        opstat.store_exception("Error displaying data types\n ", e)

    clock.stop()

    if (not (opstat.get_status())):
        display_exception(opstat)
コード例 #6
0
def process_zipcode_attributes(parms):

    DEBUG_PROC_ZC_ATTRS = False

    opstat = opStatus()

    fparms = get_parms_for_input(parms, suzw.zipcode_atributes_input_idList)
    zipcode = fparms[0]

    cfg.set_config_value(suzw.zipcode_atributes_input_id + "Parms", fparms)

    suzw.display_get_zipcode_attributes(parms)

    print("\n")

    zipattrsHeader = [""]
    zipattrsRows = []
    zipattrsWidths = [30, 70]
    zipattrsAligns = ["left", "left"]

    primary_cities = suzm.get_cities_for_zipcode(
        zipcode, city_type=suzm.PRIMARY_CITY_TYPE)
    acceptable_cities = suzm.get_cities_for_zipcode(
        zipcode, city_type=suzm.ACCEPTABLE_CITY_TYPE)
    not_acceptable_cities = suzm.get_cities_for_zipcode(
        zipcode, city_type=suzm.NOT_ACCEPTABLE_CITY_TYPE)

    zipcode_county = suzm.get_county_for_zipcode(zipcode)
    zipcode_state = suzm.get_state_for_zipcode(zipcode)

    zipcode_latitude = suzm.get_latitude_for_zipcode(zipcode)
    zipcode_longitude = suzm.get_longitude_for_zipcode(zipcode)

    zipcode_areacodes = suzm.get_areacodes_for_zipcode(zipcode)

    zipcode_active_status = suzm.is_zipcode_active(zipcode)

    zipcode_type = suzm.get_type_for_zipcode(zipcode)
    """
    501 603 604 8720 8732 8753 9001 9203 9204 9213 11708
    """
    if (DEBUG_PROC_ZC_ATTRS):

        print("primary_cities", primary_cities)
        if (not (acceptable_cities is None)):
            print("acceptable_cities", len(acceptable_cities),
                  acceptable_cities)
        else:
            print("acceptable_cities", acceptable_cities)

        if (not (not_acceptable_cities is None)):
            print("not_acceptable_cities", len(not_acceptable_cities),
                  not_acceptable_cities)
        else:
            print("not_acceptable_cities", not_acceptable_cities)

        print("zipcode_county", zipcode_county)
        print("zipcode_state", zipcode_state)
        print("zipcode_latitude", type(zipcode_latitude), zipcode_latitude)
        print("zipcode_longitude", type(zipcode_longitude), zipcode_longitude)

        print("zipcode_areacodes", type(zipcode_areacodes), zipcode_areacodes)
        print("zipcode_active_status", zipcode_active_status)
        print("zipcode_type", zipcode_type)

    if ((not (zipcode_active_status)) and (primary_cities is None)):

        zipattrsRows.append(["Current Status", "Zipcode is Invalid"])

    else:

        if (zipcode_active_status):
            zipattrsRows.append(["Current Status", "Active"])
        else:
            zipattrsRows.append(["Current Status", "Decommissioined"])

        if (primary_cities is None):

            zipattrsRows.append(["Primary City", "None"])

        else:

            if ((primary_cities == suzm.APO_ZIPCODE_TYPE)
                    or (primary_cities == suzm.FPO_ZIPCODE_TYPE)
                    or (primary_cities == suzm.DPO_ZIPCODE_TYPE)):

                zipattrsRows.append(["Primary City", "Washington DC"])

            else:
                zipattrsRows.append(["Primary City", str(primary_cities)])

        if (zipcode_county is None):
            zipattrsRows.append(["County", "None"])
        else:
            zipattrsRows.append(["County", str(zipcode_county)])

        if (zipcode_state is None):
            zipattrsRows.append(["State", "None"])
        else:
            zipattrsRows.append(["State", str(zipcode_state)])

        if ((zipcode_latitude is None) or (zipcode_longitude is None)
                or (numpy.isnan(zipcode_latitude))
                or (numpy.isnan(zipcode_longitude))):
            zipattrsRows.append(["[Latitude,Longitude]", "Unknown"])
        else:
            zipattrsRows.append([
                "[Latitude,Longitude]", "[" + str(round(zipcode_latitude, 7)) +
                " , " + str(round(zipcode_longitude, 7)) + "]"
            ])

        if (zipcode_type is None):
            zipattrsRows.append(["Zipcode Type", "None"])
        else:

            if (zipcode_type == suzm.UNIQUE_ZIPCODE_TYPE):
                zipattrsRows.append(["Zipcode Type", str(suzm.UNIQUE_text)])
            elif (zipcode_type == suzm.STANDARD_ZIPCODE_TYPE):
                zipattrsRows.append(["Zipcode Type", str(suzm.STANDARD_text)])
            elif (zipcode_type == suzm.PO_BOX_ZIPCODE_TYPE):
                zipattrsRows.append(["Zipcode Type", str(suzm.PO_BOX_text)])
            elif (zipcode_type == suzm.APO_ZIPCODE_TYPE):
                zipattrsRows.append(["Zipcode Type", str(suzm.APO_text)])
            elif (zipcode_type == suzm.FPO_ZIPCODE_TYPE):
                zipattrsRows.append(["Zipcode Type", str(suzm.FPO_text)])
            elif (zipcode_type == suzm.DPO_ZIPCODE_TYPE):
                zipattrsRows.append(["Zipcode Type", str(suzm.DPO_text)])
            else:
                zipattrsRows.append(["Zipcode Type", "Unknown"])

        if (not (zipcode_areacodes is None)):

            if (type(zipcode_areacodes) == list):
                for i in range(len(zipcode_areacodes)):
                    zipcode_areacodes[i] = zipcode_areacodes[i].replace(
                        "'", "")

            zipattrsRows.append(["Area Codes", str(zipcode_areacodes)])

        if (not (acceptable_cities is None)):
            if (len(acceptable_cities) > 0):
                zipattrsRows.append(
                    ["Acceptable Cities",
                     str(acceptable_cities)])

        if (not (not_acceptable_cities is None)):
            if (len(not_acceptable_cities) > 0):
                zipattrsRows.append(
                    ["Not Acceptable Cities",
                     str(not_acceptable_cities)])

    zipattrs_table = None

    from dfcleanser.common.table_widgets import dcTable, get_row_major_table, ROW_MAJOR, SCROLL_DOWN

    zipattrs_table = dcTable("Zipcode " + str(zipcode) + " Properties",
                             'zipcodeattrsid', cfg.SWZipcodeUtility_ID,
                             zipattrsHeader, zipattrsRows, zipattrsWidths,
                             zipattrsAligns)

    zipattrs_table.set_small(True)
    zipattrs_table.set_checkLength(False)

    zipattrs_table.set_border(True)
    zipattrs_table.set_tabletype(ROW_MAJOR)
    zipattrs_table.set_rowspertable(50)
    zipattrsHtml = get_row_major_table(zipattrs_table, SCROLL_DOWN, False)

    gridclasses = ["dfc-top"]
    gridhtmls = [zipattrsHtml]

    display_generic_grid("display-geocode-coords-wrapper", gridclasses,
                         gridhtmls)
コード例 #7
0
def process_zipcode_cities(parms):

    opstat = opStatus()

    fparms = get_parms_for_input(parms, suzw.zipcode_cities_input_idList)
    city = fparms[0]
    state = fparms[1][:2]

    cfg.set_config_value(suzw.zipcode_cities_input_id + "Parms", fparms)

    suzw.display_get_zips_for_city(parms)

    print("\n")

    cityzipsHeader = [""]
    cityzipsRows = []
    cityzipsWidths = [30, 70]
    cityzipsAligns = ["left", "left"]

    cityzips = suzm.get_zipcodes_for_city(
        state,
        city,
        zipcode_type=suzm.STANDARD_ZIPCODE_TYPE,
        active_status=suzm.ACTIVE_STATUS_TYPE)
    citypobzips = suzm.get_zipcodes_for_city(
        state,
        city,
        zipcode_type=suzm.PO_BOX_ZIPCODE_TYPE,
        active_status=suzm.ACTIVE_STATUS_TYPE)
    cityuniquezips = suzm.get_zipcodes_for_city(
        state,
        city,
        zipcode_type=suzm.UNIQUE_ZIPCODE_TYPE,
        active_status=suzm.ACTIVE_STATUS_TYPE)

    citydecomzips = suzm.get_zipcodes_for_city(
        state,
        city,
        zipcode_type=suzm.ANY_ZIPCODE_TYPE,
        active_status=suzm.DECOMMISIONED_STATUS_TYPE)

    if (not (cityzips is None)):
        cityzipsRows.append([suzm.STANDARD_text + " Zipcodes", str(cityzips)])

    if (not (citypobzips is None)):
        cityzipsRows.append([suzm.PO_BOX_text + " Zipcodes", str(citypobzips)])

    if (not (cityuniquezips is None)):
        cityzipsRows.append(
            [suzm.UNIQUE_text + " Zipcodes",
             str(cityuniquezips)])

    if (not (citydecomzips is None)):
        cityzipsRows.append(
            ["Decommissioned" + " Zipcodes",
             str(citydecomzips)])

    cityzips_table = None

    from dfcleanser.common.table_widgets import dcTable, get_row_major_table, ROW_MAJOR, SCROLL_DOWN

    cityzips_table = dcTable(
        "Zipcodes For " + str(city.upper()) + ", " + str(state),
        'cityzipcodesid', cfg.SWZipcodeUtility_ID, cityzipsHeader,
        cityzipsRows, cityzipsWidths, cityzipsAligns)

    cityzips_table.set_small(True)
    cityzips_table.set_checkLength(False)

    cityzips_table.set_border(True)
    cityzips_table.set_tabletype(ROW_MAJOR)
    cityzips_table.set_rowspertable(50)
    cityzipsHtml = get_row_major_table(cityzips_table, SCROLL_DOWN, False)

    gridclasses = ["dfc-top"]
    gridhtmls = [cityzipsHtml]

    display_generic_grid("display-geocode-coords-wrapper", gridclasses,
                         gridhtmls)
コード例 #8
0
def get_column_stats_table(df_title, df, small=False):
    """
    * -------------------------------------------------------------------------- 
    * function : get col stats for df columns
    * 
    * parms :
    *  df_title -   dataframe title
    *
    * returns : N/A
    * --------------------------------------------------------
    """
    #print("get_column_stats_table",df_title,small)

    colstatsHeader = ["Column Name", "Dtype", "Max", "Min", "Num Uniques"]
    colstatsRows = []
    colstatsWidths = [30, 15, 20, 20, 15]
    colstatsAligns = ["left", "left", "left", "left", "center"]

    rowColors = []

    index_columns = df.index.names

    if (len(index_columns) > 0):
        for i in range(len(index_columns)):
            if (not (index_columns[i] is None)):
                colstatsRows.append([
                    index_columns[i],
                    str(df.index.levels[i].dtype),
                    str(df.index.levels[i].max()),
                    str(df.index.levels[i].min()),
                    str(len(df.index.levels[i].unique()))
                ])

                rowColors.append("#ffffcc")

    columns = list(df.columns)

    for i in range(len(columns)):

        col_dtype = df[columns[i]].dtype

        if (is_numeric_col(df, columns[i])):
            col_max = str(df[columns[i]].max())
            col_min = str(df[columns[i]].min())
        else:
            col_max = ""
            col_min = ""

        col_unique_count = str(len(df[columns[i]].unique()))

        colstatsRows.append(
            [columns[i],
             str(col_dtype), col_max, col_min, col_unique_count])

    colstats_table = None

    from dfcleanser.common.table_widgets import dcTable, get_row_major_table, ROW_MAJOR, SCROLL_DOWN

    colstats_table = dcTable(
        "Column Stats For dataframe '" + str(df_title) + "'", 'colstatsid',
        cfg.SWDFSubsetUtility_ID, colstatsHeader, colstatsRows, colstatsWidths,
        colstatsAligns)

    colstats_table.set_small(True)

    if (len(rowColors) > 0):
        colstats_table.set_row_color_list(rowColors)

    if (small):
        colstats_table.set_smallwidth(60)
        colstats_table.set_smallmargin(200)
    else:
        colstats_table.set_smallwidth(90)
        colstats_table.set_smallmargin(30)

    colstats_table.set_checkLength(True)

    colstats_table.set_border(True)
    colstats_table.set_tabletype(ROW_MAJOR)
    colstats_table.set_rowspertable(50)
    colstatsHtml = get_row_major_table(colstats_table, SCROLL_DOWN, False)

    return (colstatsHtml + "<br>")
コード例 #9
0
def process_df_transform(optionid,parms,display=True) :
    """
    * -------------------------------------------------------------------------- 
    * function : process dataframe transform option
    * 
    * parms :
    *   optionid  -   transform option
    *   parms     -   transform parms
    *   display   -   display flag
    *
    * returns : 
    *  N/A
    * --------------------------------------------------------
    """
    
    opstat  =   opStatus()
    
    #dftw.display_dataframe_transform_taskbar()
    
    if(optionid == dtm.PROCESS_SHOW_COLUMN_NAMES_ROW) :
        
        dftw.display_dataframe_col_names_taskbar()
        
        print("\n")
        col_names_table = dcTable("Column Names ","cnamesTable",cfg.DataTransform_ID)
        col_names_table.set_table_column_parms({"font":12})
        col_names_table.set_note("None")
        display_column_names(cfg.get_current_chapter_df(cfg.DataTransform_ID),col_names_table,None)  


    if(optionid == dtm.PROCESS_SAVE_COLUMN_NAMES_ROW) :
        
        [opstat, filename]  =   save_column_names_row(parms)
        
        dftw.display_dataframe_col_names_taskbar()

        if(opstat.get_status()) :
            display_status_note("Column Names Row Saved Successfully to : " + filename) 
            clear_dataframe_transform_cfg_values()
        else :
            display_exception(opstat)

        
    # add column names row
    elif(optionid == dtm.PROCESS_ADD_COLUMN_NAMES_ROW) :
    
        opstat     =   add_column_names_row(parms) 
        
        dftw.display_dataframe_col_names_taskbar()
        print("\n")
        
        if(opstat.get_status()) :

            clear_dataframe_transform_cfg_values()
            display_status_note("Column Names Row Added Successfully")
            
            col_names_table = dcTable("Column Names ","cnamesTable",cfg.DataTransform_ID)
            col_names_table.set_table_column_parms({"font":12})
            col_names_table.set_note("None")
            display_column_names(cfg.get_current_chapter_df(cfg.CURRENT_TRANSFORM_DF),col_names_table,None)    
                
        else :
                    
            display_main_option([[0,0]])
            display_exception(opstat)

    
    elif(optionid == dtm.PROCESS_CHANGE_COLUMN_NAMES) :
        
        opstat = change_column_names(parms)
        
        dftw.display_dataframe_col_names_taskbar()
        print("\n")
        
        if(opstat.get_status()) :
                
            clear_dataframe_transform_cfg_values()
            display_status_note("Column Names Changed Successfully")
            
            col_names_table = dcTable("Column Names ","cnamesTable",cfg.DataTransform_ID)
            col_names_table.set_table_column_parms({"font":12})
            col_names_table.set_note("None")
            display_column_names(cfg.get_current_chapter_df(cfg.CURRENT_TRANSFORM_DF),col_names_table,None)    
                
        else :
            display_exception(opstat)


    if(optionid == dtm.PROCESS_DROP_COLUMN_NAMES_ROW) :
        
        opstat      =   drop_column_names_row()
        
        dftw.display_dataframe_col_names_taskbar()
        print("\n")
            
        if(opstat.get_status()) :
            display_status_note("Column Names Row Dropped Successfully")
            clear_dataframe_transform_cfg_values()
        else :
            display_exception(opstat)

            
    if(optionid == dtm.PROCESS_WHITESPACE_COLUMN_NAMES) :
        
        opstat      =   remwhitespace_column_names_row(parms)
        
        dftw.display_dataframe_col_names_taskbar()
        print("\n")
            
        if(opstat.get_status()) :
            display_status_note("Column Names Whitespace Removed Successfully")
            clear_dataframe_transform_cfg_values()
        else :
            display_exception(opstat)
        
    
    elif(optionid == dtm.PROCESS_SET_DF_INDEX) :
        
        opstat = set_df_index(parms) 
        
        dftw.display_dataframe_indices_taskbar()
        print("\n")
                
        if(opstat.get_status()) :
            clear_dataframe_transform_cfg_values()
            display_status_note("df Index Set Successfully")
        else :
            display_exception(opstat)
            
        dftw.display_current_df_index(cfg.get_current_chapter_df(cfg.DataTransform_ID),
                                      cfg.get_current_chapter_dfc_df_title(cfg.DataTransform_ID))
        
        dftw.display_remote_df(cfg.DataTransform_ID)

                
    elif(optionid == dtm.PROCESS_RESET_DF_INDEX) :
        
        opstat = reset_df_index(parms)
        
        dftw.display_dataframe_indices_taskbar()
        print("\n")
            
        if(opstat.get_status()) :
            clear_dataframe_transform_cfg_values()
            display_status_note("df Index Reset Successfully")
        else :
            display_exception(opstat)
            
        dftw.display_current_df_index(cfg.get_current_chapter_df(cfg.DataTransform_ID),
                                      cfg.get_current_chapter_dfc_df_title(cfg.DataTransform_ID))
        
        dftw.display_remote_df(cfg.DataTransform_ID) 
            
    elif(optionid == dtm.PROCESS_APPEND_TO_INDEX) :
        
        opstat = append_to_df_index(parms)
        
        dftw.display_dataframe_indices_taskbar()
        print("\n")
            
        if(opstat.get_status()) :
            clear_dataframe_transform_cfg_values()
            display_status_note("df Index Appended to Successfully")
        else :
            dftw.display_dataframe_options([[4,0]])
            display_exception(opstat)
            
        dftw.display_current_df_index(cfg.get_current_chapter_df(cfg.DataTransform_ID),
                                      cfg.get_current_chapter_dfc_df_title(cfg.DataTransform_ID))
 
        dftw.display_remote_df(cfg.DataTransform_ID) 
            
    elif(optionid == dtm.PROCESS_SORT_DF_INDEX) :
        
        opstat = sort_df_index(parms)
        
        dftw.display_dataframe_indices_taskbar()
        print("\n")
            
        if(opstat.get_status()) :
            clear_dataframe_transform_cfg_values()
            display_status_note("Dataframe Sorted by index Successfully")
        else :
            display_exception(opstat)
        
        dftw.display_current_df_index(cfg.get_current_chapter_df(cfg.DataTransform_ID),
                                      cfg.get_current_chapter_dfc_df_title(cfg.DataTransform_ID))
        
        dftw.display_remote_df(cfg.DataTransform_ID) 

    # drop duplicate rows
    elif(optionid == dtm.PROCESS_SORT_COLUMN) :

        opstat = process_sort_by_column(parms,display)
        
        dftw.display_dataframe_transform_main()
        print("\n")
            
        if(opstat.get_status()) :
            clear_dataframe_transform_cfg_values()
            display_status_note(opstat.get_errorMsg())
        else :
            display_main_option([[0,0]])
            display_exception(opstat) 
            
    # drop duplicate rows
    elif(optionid == dtm.PROCESS_DROP_DUPLICATE_ROWS) :
        
        df = cfg.get_current_chapter_df(cfg.DataTransform_ID)
        
        start_rows  =   len(df)

        opstat = drop_duplicate_rows(parms,display)
        
        end_rows    =   len(df)
        
        dftw.display_dataframe_transform_main()
        print("\n")
            
        if(opstat.get_status()) :
            clear_dataframe_transform_cfg_values()
            display_status_note(str(start_rows-end_rows) + " Duplicate Rows Dropped Successfully")
        else :
            display_exception(opstat) 
    
    # return
    elif(optionid == dtm.DF_TRANSFORM_RETURN) :
        
        dftw.display_dataframe_transform_main()
        
    # help
    elif(optionid == dtm.DF_TRANSFORM_HELP) :
        print("help")
コード例 #10
0
def show_libs_info():
    """
    * -------------------------------------------------------------------------- 
    * function : display dfcleanser libs info
    * 
    * parms :
    *  N/A
    *
    * returns : N/A
    * --------------------------------------------------------
    """

    clock = RunningClock()
    clock.start()

    libsHeader = [
        "Lib Name", "Tested</br>With</br>Version", "Installed</br>Version"
    ]
    libsRows = []
    libsWidths = [40, 30, 30]
    libsAligns = ["left", "center", "center"]

    testedModules = [
        "Python", "IPython", "ipywidgets", "ipykernel", "notebook", "pandas",
        "sklearn", "matplotlib", "numpy", "scipy", "json", "SQLAlchemy",
        "pymysql", "mysql-connector-python", "pyodbc", "pymssql", "SQLite3",
        "psycopg2", "cx-oracle", "geopy", "googlemaps", "arcgis"
    ]

    testedmoduleVersions = [
        "3.7.3", "7.4.0", "7.4.2", "5.1.0", "5.7.8", "0.24.2", "0.20.3",
        "3.0.3", "1.16.2", "1.2.1", "2.0.9", "1.3.1", "0.9.3", "8.0.16",
        "4.0.26", "2.1.4", "3.8.6", "2.8.2", "7.1.3", "1.19.0", "2.5.1",
        "1.6.1"
    ]

    installedmoduleVersions = []

    installedmoduleVersions.append(str(get_python_version()))
    import IPython
    installedmoduleVersions.append(str(IPython.__version__))

    try:
        import ipywidgets
        installedmoduleVersions.append(str(ipywidgets.__version__))
    except:
        installedmoduleVersions.append(str("-1"))

    try:
        import ipykernel
        installedmoduleVersions.append(str(ipykernel.__version__))
    except:
        installedmoduleVersions.append(str("-1"))

    try:
        import notebook
        installedmoduleVersions.append(str(notebook.__version__))
    except:
        installedmoduleVersions.append(str("-1"))

    try:
        import pandas
        installedmoduleVersions.append(str(pandas.__version__))
    except:
        installedmoduleVersions.append(str("-1"))

    try:
        import sklearn
        installedmoduleVersions.append(str(sklearn.__version__))
    except:
        installedmoduleVersions.append(str("-1"))

    try:
        import matplotlib
        installedmoduleVersions.append(str(matplotlib.__version__))
    except:
        installedmoduleVersions.append(str("-1"))

    try:
        import numpy
        installedmoduleVersions.append(str(numpy.__version__))
    except:
        installedmoduleVersions.append(str("-1"))

    try:
        import scipy
        installedmoduleVersions.append(str(scipy.__version__))
    except:
        installedmoduleVersions.append(str("-1"))

    try:
        import json
        installedmoduleVersions.append(str(json.__version__))
    except:
        installedmoduleVersions.append(str("-1"))

    try:
        import sqlalchemy
        installedmoduleVersions.append(str(sqlalchemy.__version__))
    except:
        installedmoduleVersions.append(str("-1"))

    try:
        import pymysql
        installedmoduleVersions.append(str(pymysql.__version__))
    except:
        installedmoduleVersions.append(str("-1"))

    try:
        import mysql.connector
        installedmoduleVersions.append(str(mysql.connector.__version__))
    except:
        installedmoduleVersions.append(str("-1"))

    #import pyodbc
    installedmoduleVersions.append(str("unknown"))

    try:
        import pymssql
        installedmoduleVersions.append(str(pymssql.__version__))
    except:
        installedmoduleVersions.append(str("-1"))

    #import sqlite3
    installedmoduleVersions.append(str("unknown"))

    try:
        import psycopg2

        pgversion = str(psycopg2.__version__)
        found = pgversion.find("(")
        if (found > 0):
            installedmoduleVersions.append(pgversion[0:found - 1])
        else:
            installedmoduleVersions.append(pgversion)
    except:
        installedmoduleVersions.append(str("-1"))

    try:
        import cx_Oracle
        installedmoduleVersions.append(str(cx_Oracle.__version__))
    except:
        installedmoduleVersions.append(str("-1"))

    try:
        import geopy
        installedmoduleVersions.append(str(geopy.__version__))
    except:
        installedmoduleVersions.append(str("-1"))

    try:
        import googlemaps
        installedmoduleVersions.append(str(googlemaps.__version__))
    except:
        installedmoduleVersions.append(str("-1"))

    try:
        import arcgis
        installedmoduleVersions.append(str(arcgis.__version__))
    except:
        installedmoduleVersions.append(str("-1"))

    for i in range(len(testedModules)):
        libsrow = []
        libsrow.append(str(testedModules[i]))
        libsrow.append(str(testedmoduleVersions[i]))
        if (installedmoduleVersions[i] == "-1"):
            installedmoduleVersions[i] = "not installed"

        libsrow.append(str(installedmoduleVersions[i]))

        libsRows.append(libsrow)

    colorList = []
    for i in range(len(testedModules)):
        colorRow = []
        if (installedmoduleVersions[i] == "not installed"):
            colorRow = [sysm.Yellow, sysm.Yellow, sysm.Red]
        elif (installedmoduleVersions[i] == "unknown"):
            colorRow = [sysm.Green, sysm.Green, sysm.Yellow]
        elif (testedmoduleVersions[i] > installedmoduleVersions[i]):
            colorRow = [sysm.Red, sysm.Red, sysm.Red]
        elif (testedmoduleVersions[i] < installedmoduleVersions[i]):
            colorRow = [sysm.Yellow, sysm.Yellow, sysm.Yellow]
        else:
            colorRow = [sysm.Green, sysm.Green, sysm.Green]

        colorList.append(colorRow)

    libs_table = dcTable("Python Libraries", "dcmodsTable", cfg.System_ID,
                         libsHeader, libsRows, libsWidths, libsAligns)

    libs_table.set_tabletype(SIMPLE)
    libs_table.set_rowspertable(len(testedModules))
    libs_table.set_color(True)
    libs_table.set_colorList(colorList)
    libs_table.set_small(True)

    if (cfg.get_dfc_mode() == cfg.INLINE_MODE):
        libs_table.set_smallwidth(99)
        libs_table.set_smallmargin(2)
    else:
        libs_table.set_smallwidth(98)
        libs_table.set_smallmargin(2)

    libs_table.set_checkLength(False)
    libs_table.set_border(False)

    clock.stop()

    return (libs_table.get_html())