Exemple #1
0
def make_si(feat: pd.DataFrame, cols: list, transform: bool = True):

    # Imports
    from src.utils import assertions as a
    from sklearn.impute import SimpleImputer
    import numpy as np

    # Assertions
    assert a.all_dataframe(feat)
    if isinstance(cols, str): cols = [cols]
    assert a.all_str(cols)
    assert a.all_in(cols, feat.columns)
    assert a.all_bool(transform)

    # Instantiations
    si = SimpleImputer(missing_values=np.nan,
                       strategy="constant",
                       fill_value="Other")

    # Do work
    if transform:
        feat[cols] = si.fit_transform(feat[cols])
    else:
        si.fit(feat[cols])

    # Return
    return feat, si
Exemple #2
0
def make_ohe(feat: pd.DataFrame, cols: list, transform: bool = True):

    # Imports
    from src.utils import assertions as a
    from sklearn.preprocessing import OneHotEncoder

    # Assertions
    assert a.all_dataframe(feat)
    assert a.all_str(cols)
    if isinstance(cols, str): cols = [cols]
    assert a.all_in(cols, feat.columns)
    assert a.all_bool(transform)

    # Instantiations
    ohe = OneHotEncoder(sparse=False)

    # Do work
    data = feat[cols]
    if transform:
        data = ohe.fit_transform(data)
        data = pd.DataFrame(data)
        data.columns = ohe.get_feature_names(cols)
        feat.drop(cols, axis=1, inplace=True)
        feat = pd.concat([feat, data], axis=1)
    else:
        ohe.fit(data)

    # Return
    return feat, ohe
Exemple #3
0
def scale_features(feat: pd.DataFrame,
                   cols: list = None,
                   transform: bool = True):

    # Imports
    from src.utils import assertions as a
    from sklearn.preprocessing import StandardScaler

    # Assertions
    assert a.all_dataframe(feat)
    if cols:
        if isinstance(cols, str): cols = [cols]
        assert a.all_str(cols)
        assert a.all_in(cols, feat.columns)
    assert a.all_bool(transform)

    # Get cols
    if not cols:
        cols = feat.columns

    # Instantiations
    sc = StandardScaler()

    # Do work
    if transform:
        feat[cols] = sc.fit_transform(feat[cols])
    else:
        sc.fit(feat[cols])

    # Return
    return feat, sc
Exemple #4
0
def plot_network_training(metrics:dict):
    
    # Imports
    from IPython.display import clear_output
    import numpy as np
    import matplotlib.pyplot as plt
    from src.utils import assertions as a

    # Assertions
    assert isinstance(metrics, dict)
    assert a.all_in(["accu_trn", "loss_trn", "accu_val", "loss_val"], list(metrics.keys()))

    # If only 1 score, then end
    epoch = len(next(iter(metrics.values())))
    if epoch < 2:
        return None

    # Clearn previous output
    clear_output(wait=True)

    # Define space
    N = np.arange(1, len(next(iter(metrics.values())))+1)

    # You can chose the style of your preference
    # print(plt.style.available) to see the available options
    #plt.style.use("seaborn")

    # Plot train loss, train acc, val loss and val acc against epochs passed
    plt.figure(figsize=(8,8))
    
    # Accuracy
    plt.subplot(2,1,1)
    plt.plot(N, metrics.get("accu_trn"), label = "Training Accuracy")
    plt.plot(N, metrics.get("accu_val"), label = "Validation Accuracy")
    plt.legend(loc="best")
    plt.title("Accuracy [Epoch {}]".format(epoch))
    plt.ylim([0,1.1])
    plt.ylabel("Accuracy")
    
    # Loss
    plt.subplot(2,1,2)
    plt.plot(N, metrics.get("loss_trn"), label = "Training Loss")
    plt.plot(N, metrics.get("loss_val"), label = "Validation Loss")
    plt.legend(loc="best")
    plt.title("Loss [Epoch {}]".format(epoch))
    plt.ylabel("Loss")
    plt.xlabel("Epoch #")
    
    # Show
    plt.show()
Exemple #5
0
def sel_feat_cols(data:pd.DataFrame, feats:list):
    
    # Imports
    from src.utils import assertions as a
    
    # Assertions
    assert a.all_dataframe(data)
    assert isinstance(feats, (str, list))
    assert a.all_str(feats)
    assert a.all_in(feats, data.columns)
    
    # Do work
    data = data[feats]
    
    return data
Exemple #6
0
def rem_features(data:pd.DataFrame, feats:list):
    
    # Imports
    from src.utils import assertions as a
    
    # Assertions
    assert a.all_dataframe(data)
    assert a.all_str(feats, (str, list))
    assert a.all_str(feats)
    assert a.all_in(feats, data.columns)
    
    # Do work
    data = data.drop(columns=feats)
    
    # Return
    return data
Exemple #7
0
def make_le(feat: pd.DataFrame, cols: list, transform: bool = True):

    # Imports
    from src.utils import assertions as a
    from sklearn.preprocessing import LabelEncoder

    # Assertions
    assert a.all_dataframe(feat)
    assert a.all_str(cols)
    if isinstance(cols, str): cols = [cols]
    assert a.all_in(cols, feat.columns)
    assert a.all_bool(transform)

    # Instantiations
    le = LabelEncoder()

    # Do work
    if transform:
        feat[cols] = le.fit_transform(feat[cols])
    else:
        le.fit(feat[cols])

    # Return
    return feat, le
Exemple #8
0
def train_overall_network \
    ( feat_trn:np.real
    , targ_trn:np.real
    , feat_val:np.real
    , targ_val:np.real
    , hidden_shapes:list=[20,20,20]
    , hidden_acti:str="relu"
    , final_shape:int=1
    , final_acti:str="sigmoid"
    , batch_size:int=100
    , epochs:int=500
    , learning_rate:float=0.001
    , device:torch.device=get_device()
    , scheduler:bool=True
    , verbosity:int=10
    , plot_learning:bool=True
    ):

    # Imports
    import numpy as np
    from src.utils import assertions as a
    from src.models.pytorch import PyTorchDataset
    from torch import nn, optim
    from src.models.pytorch import Net
    
    # Assertions
    assert a.all_real([feat_trn, targ_trn, feat_val, targ_val])
    assert isinstance(hidden_shapes, list)
    assert len(hidden_shapes)>0, "Must have at least 1 hidden layer"
    assert a.all_in(hidden_shapes)
    assert a.all_scalar([hidden_acti, final_shape, final_acti, batch_size, epochs, learning_rate])
    assert isinstance(verbosity, (int, type(None)))
    assert a.all_int([batch_size, epochs, verbosity])
    assert a.all_str([hidden_acti, final_acti])
    assert a.all_float(learning_rate)

    # Initialise data generators
    data_trn = PyTorchDataset(feat_trn, targ_trn)
    data_val = PyTorchDataset(feat_val, targ_val)

    # Initialise classes
    modl = Net(feat_trn.shape[1], len(set(targ_trn)))
    crit = nn.CrossEntropyLoss()
    optm = optim.Adam(modl.parameters(), lr=learning_rate)
    if scheduler:
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer=optm, mode="min", patience=3)

    # Push network to device
    modl.to(device)
    
    # Set dumping ground
    costs = {"epoch": [], "loss_trn": [], "accu_trn": [], "loss_val": [], "accu_val": []}
    loss_trn = 0.0
    accu_trn = 0.0

    # Loop over epochs
    for epoch in range(epochs):

        loss_trn, accu_trn = model_train \
            ( data_trn=data_trn
            , modl=modl
            , crit=crit
            , optm=optm
            , batch_size=batch_size
            , hidden_shapes=hidden_shapes
            , hidden_acti=hidden_acti
            , final_shape=final_shape
            , final_acti=final_acti
            , device=device
            , scheduler=scheduler
            )
        
        loss_val, accu_val = model_validate \
            ( data_val=data_val
            , modl=modl
            , crit=crit
            , batch_size=batch_size
            , hidden_shapes=hidden_shapes
            , hidden_acti=hidden_acti
            , final_shape=final_shape
            , final_acti=final_acti
            , device=device
            )

        # Record progress
        costs["epoch"].append(epoch+1)
        costs["loss_trn"].append(loss_trn)
        costs["accu_trn"].append(accu_trn)
        costs["loss_val"].append(loss_val)
        costs["accu_val"].append(accu_val)

        # Adjust scheduler
        if scheduler:
            scheduler.step()

        # Print stats
        if verbosity:
            if epoch % verbosity == 0 or epoch+1==epochs:
                # Plot learning
                if plot_learning:
                    plot_network_training(costs)
                # Print metrics
                # print("Epoch: {}/{}\tLoss: {:.5f}".format(costs["epoch"][-1], epochs, costs["trn_los"][-1]))

    # Return
    return modl