def test()->tuple: """ Load test dataset. return -- tuple(dataframe data, dictionary columns) """ # header click.secho('Load data..', fg='green') # read data df, col = csv2df(PATH_TEST, ltarget=[], lindex=['PassengerId']) # return return (df, col)
def train()->tuple: """ Load training dataset. return -- tuple(dataframe data, dictionary columns) """ # header click.secho('Load data..', fg='green') # read data df, col = csv2df(PATH_TRAIN, ltarget=['Survived'], lindex=['PassengerId']) # return return (df, col)
def load()->tuple: """ Load weather dataset (without target variable). return -- tuple(dataframe data, dictionary columns) """ # header click.secho('Load data..', fg='green') # read data ddt = {'lcol': ['dt'], 'sformat': '%Y-%m-%d %H:%M:%S'} df, dcol = csv2df(PATH, lindex=['dt'], ddt=ddt) # return return (df, dcol)
def load() -> tuple: """ Load wine dataset (without target variable). return -- tuple(dataframe data, dictionary columns) """ # header click.secho('Load data..', fg='green') # read data df, dcol = csv2df(PATH) # format df.Proline = df.Proline.astype(float) df.Magnesium = df.Magnesium.astype(float) df.Alcohol = df.Alcohol.astype(int) # update dcol col = columns() col.get(df) # return return (df, col)
def load() -> tuple: """ Load solar dataset. return -- tuple(dataframe data, dictionary columns) """ # header click.secho('Load data..', fg='green') # read data ddt = {'lcol': ['dt'], 'sformat': '%Y-%m-%d %H:%M:%S'} df, dcol = csv2df(PATH, ltarget=['y', 'cy'], lindex=['dt'], ddt=ddt) # format df.cy = df.cy.astype(int) # update dcol col = columns() col.get(df, ['y', 'cy']) # return return (df, col)
quit('Aborted!') # fit, transform and return return transformer.full_pipeline.fit_transform(df) def numerical(df: 'dataframe')->'array': """ Launch a pre-processing Pipeline with only numerical variables. df -- data to be transformed. """ # validate if there are NaN values. if df.isnull().sum().sum() > 0: click.secho('[error] the dataframe to be transformated contains NaN values.', fg='red', bold=True) print(df.isnull().sum()) quit('Aborted!') # fit, transform and return return transformer.num_pipeline.fit_transform(df) if __name__ == '__main__': from tools import reader # read data data, dcol = reader.csv2df('../../datasets/dataset.weather.csv', lindex=['datetime']) # get a sample dfX = data[dcol['lc_float'][:1] + dcol['lc_cat'][:1]] # transformation X = full(dfX.dropna().head()) print(X[:, :5])