Esempio n. 1
0
def convert_dataframe(df, to='pandas', return_library=True, dtypes=None):
    '''Converts a dataframe to the desired dataframe library format.

    Parameters
    ----------
    df : pandas.DataFrame or dask.DataFrame or modin.pandas.DataFrame
        Original dataframe which will be converted.
    to : string, default 'pandas'
        The data library to which format the dataframe will be converted to.
    return_library : bool, default True
        If set to True, the new dataframe library is also returned as an output.
    dtypes : dict, default None
        Dictionary that indicates the desired dtype for each column.
        e.g. {'Var1': 'float64', 'Var2': 'UInt8', 'Var3': str}

    Returns
    -------
    df : pandas.DataFrame or dask.DataFrame or modin.pandas.dataframe.DataFrame
        Converted dataframe, in the desired type.

    If return_library == True:

    new_pd : pandas or modin.pandas
        The dataframe library to which the input dataframe is converted to.
    '''
    lib = str(to).lower()
    if lib == 'pandas':
        import pandas as new_pd
    elif lib == 'modin':
        import modin.pandas as new_pd
    else:
        raise Exception(
            f'ERROR: Currently, convertion to a dataframe of type {to} is not supported. Availabale options are "pandas" and "modin".'
        )
    converted_df = new_pd.DataFrame(data=df.to_numpy(), columns=df.columns)
    du.set_pandas_library(lib)
    if dtypes is None:
        # Infer adequate dtypes for the dataframe's columns
        converted_df = converted_df.infer_objects()
    else:
        # Set the desired dtypes
        converted_df = convert_dtypes(converted_df,
                                      dtypes=dtypes,
                                      inplace=True)
    if return_library is True:
        return converted_df, new_pd
    else:
        return converted_df
import numpy as np                         # Mathematical operations package, allowing also for missing values representation
import torch                               # PyTorch for tensor and deep learning operations
import plotly.graph_objs as go             # Plotly for interactive and pretty plots
import data_utils as du                    # Data science and machine learning relevant methods
from model_interpreter.model_interpreter import ModelInterpreter  # Model interpretability class
import shap                                # Model-agnostic interpretability package inspired on Shapley values
from datetime import datetime              # datetime to use proper date and time formats
import pickle                              # Save and load Python objects

du.random_seed

du.set_random_seed(42)

du.random_seed

du.set_pandas_library(lib='pandas')

import pixiedust                           # Debugging in Jupyter Notebook cells

# Change to scripts directory
os.chdir('../../scripts')

import Models                              # Script with all the machine learning model classes

# Change to parent directory (presumably "eICU-mortality-prediction")
os.chdir('..')

# ## Initializing variables

# Comet ML settings:
Esempio n. 3
0
import pandas as pd                        # Pandas to load the data initially
import numpy as np                         # Mathematical operations package, allowing also for missing values representation
import torch                               # PyTorch for tensor and deep learning operations
import data_utils as du                    # Data science and machine learning relevant methods
import os                                  # os handles directory/workspace changes

du.random_seed

du.set_random_seed(42)

du.random_seed

du.use_modin

du.set_pandas_library('pandas')

du.use_modin

import pixiedust                           # Debugging in Jupyter Notebook cells

# Change to scripts directory
os.chdir('../../scripts')

from Tabular_Dataset import Tabular_Dataset # Dataset class that helps fetching batches of data
import Models                              # Script with all the machine learning model classes

# Change to parent directory (presumably "eICU-mortality-prediction")
os.chdir('..')

# ## Initializing variables