Esempio n. 1
0
def make_si(feat: pd.DataFrame, cols: list, transform: bool = True):

    # Imports
    from src.utils import assertions as a
    from sklearn.impute import SimpleImputer
    import numpy as np

    # Assertions
    assert a.all_dataframe(feat)
    if isinstance(cols, str): cols = [cols]
    assert a.all_str(cols)
    assert a.all_in(cols, feat.columns)
    assert a.all_bool(transform)

    # Instantiations
    si = SimpleImputer(missing_values=np.nan,
                       strategy="constant",
                       fill_value="Other")

    # Do work
    if transform:
        feat[cols] = si.fit_transform(feat[cols])
    else:
        si.fit(feat[cols])

    # Return
    return feat, si
Esempio n. 2
0
def make_ohe(feat: pd.DataFrame, cols: list, transform: bool = True):

    # Imports
    from src.utils import assertions as a
    from sklearn.preprocessing import OneHotEncoder

    # Assertions
    assert a.all_dataframe(feat)
    assert a.all_str(cols)
    if isinstance(cols, str): cols = [cols]
    assert a.all_in(cols, feat.columns)
    assert a.all_bool(transform)

    # Instantiations
    ohe = OneHotEncoder(sparse=False)

    # Do work
    data = feat[cols]
    if transform:
        data = ohe.fit_transform(data)
        data = pd.DataFrame(data)
        data.columns = ohe.get_feature_names(cols)
        feat.drop(cols, axis=1, inplace=True)
        feat = pd.concat([feat, data], axis=1)
    else:
        ohe.fit(data)

    # Return
    return feat, ohe
Esempio n. 3
0
def scale_features(feat: pd.DataFrame,
                   cols: list = None,
                   transform: bool = True):

    # Imports
    from src.utils import assertions as a
    from sklearn.preprocessing import StandardScaler

    # Assertions
    assert a.all_dataframe(feat)
    if cols:
        if isinstance(cols, str): cols = [cols]
        assert a.all_str(cols)
        assert a.all_in(cols, feat.columns)
    assert a.all_bool(transform)

    # Get cols
    if not cols:
        cols = feat.columns

    # Instantiations
    sc = StandardScaler()

    # Do work
    if transform:
        feat[cols] = sc.fit_transform(feat[cols])
    else:
        sc.fit(feat[cols])

    # Return
    return feat, sc
Esempio n. 4
0
def pop_target(data:pd.DataFrame, targ:str):
    """
    Pop the target column off the data set.

    Args:
        data (pd.DataFrame): The data set, from which the target will be removed.
        targ (str): The name of the feature to be removed. Must be a valid and existing column in `data`.

    Returns:
        data (pd.DataFrame): The updated `data` object, having had the `feat` column removed.
        targ (pd.Series): The feature that has been removed from the `data` object.
    """
    
    # Imports
    from src.utils import assertions as a
    
    # Assertions
    assert a.all_dataframe(data)
    assert a.all_str(targ)
    assert targ in data.columns
    
    # Do work
    # targ = data.pop(targ)
    feat = data.drop([targ], axis=1)
    targ = data[[targ]]
    
    # Return
    return feat, targ
Esempio n. 5
0
def sel_feat_cols(data:pd.DataFrame, feats:list):
    
    # Imports
    from src.utils import assertions as a
    
    # Assertions
    assert a.all_dataframe(data)
    assert isinstance(feats, (str, list))
    assert a.all_str(feats)
    assert a.all_in(feats, data.columns)
    
    # Do work
    data = data[feats]
    
    return data
Esempio n. 6
0
def rem_features(data:pd.DataFrame, feats:list):
    
    # Imports
    from src.utils import assertions as a
    
    # Assertions
    assert a.all_dataframe(data)
    assert a.all_str(feats, (str, list))
    assert a.all_str(feats)
    assert a.all_in(feats, data.columns)
    
    # Do work
    data = data.drop(columns=feats)
    
    # Return
    return data
Esempio n. 7
0
def make_le(feat: pd.DataFrame, cols: list, transform: bool = True):

    # Imports
    from src.utils import assertions as a
    from sklearn.preprocessing import LabelEncoder

    # Assertions
    assert a.all_dataframe(feat)
    assert a.all_str(cols)
    if isinstance(cols, str): cols = [cols]
    assert a.all_in(cols, feat.columns)
    assert a.all_bool(transform)

    # Instantiations
    le = LabelEncoder()

    # Do work
    if transform:
        feat[cols] = le.fit_transform(feat[cols])
    else:
        le.fit(feat[cols])

    # Return
    return feat, le
Esempio n. 8
0
def encode_features(feat: pd.DataFrame,
                    cols=list,
                    type: str = "ordinal",
                    transform: bool = True):

    # Imports
    from src.utils import assertions as a
    from src.data.prep_data import make_oe, make_ohe

    # Assertions
    assert a.all_dataframe(feat)
    assert isinstance(cols, (str, list))
    assert a.all_str(cols)
    assert a.all_str(type)

    # Do work & return
    if type in ["oe", "ord", "ordinal", "ordinalencoder", "ordinal encoder"]:
        return make_oe(feat=feat, cols=cols, transform=transform)
    elif type in ["ohe", "one", "onehotencoder", "one hot encoder"]:
        return make_ohe(feat=feat, cols=cols, transform=transform)
    elif type in ["le", "label", "label encoder", "labelencoder"]:
        return make_le(feat=feat, cols=cols, transform=transform)
    else:
        return feat, None