Ejemplos de FunctionLib.get_missing_value_feats en Python

Lenguaje de programación: Python

Namespace/Package Name: Model

Clase / Tipo: FunctionLib

Método / Función: get_missing_value_feats

Ejemplos en hotexamples.com: 6

Python FunctionLib.get_missing_value_feats - 6 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de Model.FunctionLib.get_missing_value_feats extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

get_params(8)

distinct_feats(7)

change_type(7)

get_missing_value_feats(6)

ScoreDataFrame(3)

get_aggregate_features_num(3)

get_model_performance(3)

TurkyOutliers(2)

impute_knn_classifier(2)

GetScaledModel(2)

get_rowcnt_most_missing_val(2)

GetBasedModel(2)

cv_score(2)

corr_feats(2)

GetScaledModelwithfactorizedCW(2)

plot_bar(2)

missing_val_perc(2)

impute_values(2)

log_transform(2)

PlotBoxR(2)

match_strings(1)

hist_perc(1)

hist_compare(1)

get_unique_val_list(1)

plot_stats(1)

min_len_col(1)

AdaBoostClassifier(1)

get_corr(1)

feature_stats(1)

default_ratio(1)

cv_metrics(1)

concat_model_score(1)

RandomSearch(1)

RandomForestClassifier(1)

LogisticRegression(1)

KNeighborsClassifier(1)

GridSearch(1)

GradientBoostingClassifier(1)

GetScaledModelwithbestparams(1)

train_test_split(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: Preprocessing.py Proyecto: rkparyani/KAGGLE---Home-Credit-Default-Risk

    def missing_value_treatment(self, min_threshold):
        # Identify na values exist and add them to a list

        missing_value_feats = f.get_missing_value_feats(self.ds1_df)
        print(missing_value_feats)
        # Calculate Missing Value percentage and Visualize
        missing_values_perc_df = f.missing_val_perc(missing_value_feats,
                                                    self.ds1_df)
        val = missing_values_perc_df[0].sort_values(ascending=False)
        f.plot_bar(val.index, (50, 10), val)

        # Check direct imputations such as remove the records for attributes which contain less than 5% of null values or remove
        # attributes which contain more than 65% of null values.
        self.ds1_df = f.impute_values(self.ds1_df,
                                      missing_value_feats,
                                      min_threshold,
                                      action=True)
        self.ds1_df.reset_index(drop=True)

        # How row in dataframe having more than x% NaN values
        na_row_cnt = f.get_rowcnt_most_missing_val(self.ds1_df, 30)
        print('No of rows having more than 30% NA Values', na_row_cnt)

        # Identify na values exist and add them to a list
        missing_value_feats = f.get_missing_value_feats(self.ds1_df)
        print(missing_value_feats)

Ejemplo n.º 2

Mostrar archivo

    def define_dataset(self):
        # Observe the features with missing values
        f.get_missing_value_feats(self.ds1_df)

        # Seperate the categorical and numerical features
        self.ds1_df.shape
        num_feats, cat_feats = self.seperate_cat_num_var(self.ds1_df)

        # Change the datatype of categorical and numerical values
        f.change_type(self.ds1_df, num_feats, count_threshold=5)

        # Seperate the categorical and numerical features
        num_feats, cat_feats = self.seperate_cat_num_var(self.ds1_df)
        par_num_df_start, par_cat_df_start = f.get_params(
            self.ds1_df, num_feats, cat_feats)
        return par_num_df_start, par_cat_df_start

Ejemplo n.º 3

Mostrar archivo

Archivo: Preprocessing.py Proyecto: rkparyani/KAGGLE---Home-Credit-Default-Risk

    def define_dataset(self, df=None, ch_type=False, cnt_threshold=2):
        # Observe the features with missing values
        if df == None:
            df = self.ds1_df
        f.get_missing_value_feats(df)

        # Seperate the categorical and numerical features
        num_feats, cat_feats = self.seperate_cat_num_var(df)

        # Change the datatype of categorical and numerical values
        if ch_type == True:
            f.change_type(df, num_feats, count_threshold=cnt_threshold)

        # Seperate the categorical and numerical features
        par_num_df_start, par_cat_df_start = self.define_params(df)
        stats_df = f.feature_stats(df)

        par_num_df_start = par_num_df_start.join(stats_df, how='left')
        par_cat_df_start = par_cat_df_start.join(stats_df, how='left')

        return par_num_df_start, par_cat_df_start

Ejemplo n.º 4

Mostrar archivo

Archivo: Preprocessing.py Proyecto: rkparyani/KAGGLE---Home-Credit-Default-Risk

    def missing_value_imputations(self):
        #################################### MISSING VALUES #############################
        # Since the numerical univariate distribution are symmetrical now with no difference
        # between median and mean. Lets impute all the numerical missing values with mean
        # Record missing values for further validations:
        #indicator = MissingIndicator(missing_values=np.nan)
        #mask_missing_values_only = indicator.fit_transform(self.ds1_df)
        #mask_missing_values_only.shape

        num_feats_imp_df, cat_feats_imp_df = self.seperate_cat_num_var(
            self.ds1_df)
        # Num missing values imputations
        self.ds1_df[num_feats_imp_df] = self.ds1_df[num_feats_imp_df].fillna(
            value=self.ds1_df[num_feats_imp_df].mean())

        # Left missing values are categorical.
        missing_feats_cat = f.get_missing_value_feats(self.ds1_df)

        par_num_df, par_cat_df = f.get_params(self.ds1_df, num_feats_imp_df,
                                              cat_feats_imp_df)
        # Categorical values where mode frequency is more than 80% - Impute na with Mode
        # If not then use the KNN model to impute the values

        mode_threshold = 80
        for feature in missing_feats_cat:
            if par_cat_df.loc[feature]['MODE_PERCENTAGE'] > mode_threshold:
                self.ds1_df[feature].fillna(
                    value=par_cat_df.loc[feature]['MODE'], inplace=True)
                print("Method : MODE , Feature : {} , Mode_Percentage : {}".
                      format(feature,
                             par_cat_df.loc[feature]['MODE_PERCENTAGE']))

            else:
                imp_list, score = f.impute_knn_classifier(
                    self.ds1_df, feature, 5)
                self.ds1_df[feature].fillna(value=imp_list, inplace=True)
                print(
                    "Method : KNN , Feature : {} , Imputation Accuracy Score : {}"
                    .format(feature, score))
        return par_num_df, par_cat_df

Ejemplo n.º 5

Mostrar archivo

Archivo: EDA.py Proyecto: rkparyani/KAGGLE---Home-Credit-Default-Risk


color_list = ['green','blue','orange','yellow','red','violet','cyan']


# In[5]:


val = x_df.isna().sum().sort_values(ascending=False)
f.plot_bar(val.index,(70,10),val,30)


# In[6]:


f.get_missing_value_feats(x_df)


# In[5]:


# Seperate the categorical and numerical features
num_feats,cat_feats = f.distinct_feats(x_df)
print(len(num_feats),len(cat_feats))
num_feats.remove('TARGET')


# In[6]:


f.change_type(x_df,num_feats,10)

Ejemplo n.º 6

Mostrar archivo

Archivo: Preprocessing_app_train.py Proyecto: rkparyani/KAGGLE---Home-Credit-Default-Risk

# Import Libraries
import Model.FunctionLib as f

# Import working dataset
train_df = pd.read_csv(train_dataset)

# Create a new dataset same as train data
x_df = train_df.sample(frac=0.1, random_state=1).reset_index(drop=True)

# Delete the original dataset and work with Sample to free some space for processing.
del train_df

################################ CHANGING THE DATA TYPES ################################

# Observe the features with missing values
f.get_missing_value_feats(x_df)

# Seperate the categorical and numerical features
num_feats, cat_feats = f.distinct_feats(x_df)
print(len(num_feats), len(cat_feats))
num_feats.remove('TARGET')
num_feats.remove('SK_ID_CURR')

# Change the datatype of categorical and numerical values
f.change_type(x_df, num_feats, count_threshold=5)

# Seperate the categorical and numerical features
num_feats, cat_feats = f.distinct_feats(x_df)
print(len(num_feats), len(cat_feats))
num_feats.remove('TARGET')
num_feats.remove('SK_ID_CURR')